From 1b7558cd7d3ba68a0d6317a7505c182dc76adae0 Mon Sep 17 00:00:00 2001 From: Serge Smertin <259697+nfx@users.noreply.github.com> Date: Fri, 17 Nov 2023 13:47:37 +0100 Subject: [PATCH] Add `databricks labs` command group (#914) ## Command group image ## Installed versions image ## Project commands image ## Installer hook ![image](https://github.com/databricks/cli/assets/259697/3ce0d355-039a-445f-bff7-6dfc1a2e3288) ## Update notifications ![image](https://github.com/databricks/cli/assets/259697/10724627-3606-49e1-9722-00ae37afed12) # Downstream work - https://github.com/databrickslabs/ucx/pull/517 - https://github.com/databrickslabs/dlt-meta/pull/19 - https://github.com/databrickslabs/discoverx/pull/84 --- cmd/cmd.go | 2 + cmd/labs/CODEOWNERS | 1 + cmd/labs/clear_cache.go | 33 ++ cmd/labs/github/github.go | 66 +++ cmd/labs/github/ref.go | 20 + cmd/labs/github/ref_test.go | 48 ++ cmd/labs/github/releases.go | 61 +++ cmd/labs/github/releases_test.go | 34 ++ cmd/labs/github/repositories.go | 59 +++ cmd/labs/github/repositories_test.go | 30 ++ cmd/labs/install.go | 21 + cmd/labs/installed.go | 57 +++ cmd/labs/installed_test.go | 19 + cmd/labs/labs.go | 39 ++ cmd/labs/list.go | 62 +++ cmd/labs/list_test.go | 19 + cmd/labs/localcache/jsonfile.go | 109 +++++ cmd/labs/localcache/jsonfile_test.go | 132 ++++++ cmd/labs/project/command_test.go | 69 +++ cmd/labs/project/entrypoint.go | 250 +++++++++++ cmd/labs/project/fetcher.go | 141 ++++++ cmd/labs/project/helpers.go | 35 ++ cmd/labs/project/init_test.go | 13 + cmd/labs/project/installed.go | 58 +++ cmd/labs/project/installed_test.go | 19 + cmd/labs/project/installer.go | 286 ++++++++++++ cmd/labs/project/installer_test.go | 415 ++++++++++++++++++ cmd/labs/project/login.go | 117 +++++ cmd/labs/project/project.go | 352 +++++++++++++++ cmd/labs/project/project_test.go | 22 + cmd/labs/project/proxy.go | 146 ++++++ cmd/labs/project/schema.json | 126 ++++++ cmd/labs/project/testdata/.gitignore | 1 + .../databrickslabs-blueprint-releases.json | 8 + .../labs/blueprint/config/login.json | 4 + .../.databricks/labs/blueprint/lib/install.py | 1 + .../.databricks/labs/blueprint/lib/labs.yml | 33 ++ .../.databricks/labs/blueprint/lib/main.py | 27 ++ .../labs/blueprint/lib/pyproject.toml | 11 + .../blueprint/state/other-state-file.json | 1 + .../labs/blueprint/state/venv/pyvenv.cfg | 0 .../labs/blueprint/state/version.json | 4 + .../labs/databrickslabs-repositories.json | 37 ++ .../testdata/installed-in-home/.databrickscfg | 9 + cmd/labs/show.go | 57 +++ cmd/labs/uninstall.go | 39 ++ cmd/labs/unpack/zipball.go | 64 +++ cmd/labs/upgrade.go | 21 + 48 files changed, 3178 insertions(+) create mode 100644 cmd/labs/CODEOWNERS create mode 100644 cmd/labs/clear_cache.go create mode 100644 cmd/labs/github/github.go create mode 100644 cmd/labs/github/ref.go create mode 100644 cmd/labs/github/ref_test.go create mode 100644 cmd/labs/github/releases.go create mode 100644 cmd/labs/github/releases_test.go create mode 100644 cmd/labs/github/repositories.go create mode 100644 cmd/labs/github/repositories_test.go create mode 100644 cmd/labs/install.go create mode 100644 cmd/labs/installed.go create mode 100644 cmd/labs/installed_test.go create mode 100644 cmd/labs/labs.go create mode 100644 cmd/labs/list.go create mode 100644 cmd/labs/list_test.go create mode 100644 cmd/labs/localcache/jsonfile.go create mode 100644 cmd/labs/localcache/jsonfile_test.go create mode 100644 cmd/labs/project/command_test.go create mode 100644 cmd/labs/project/entrypoint.go create mode 100644 cmd/labs/project/fetcher.go create mode 100644 cmd/labs/project/helpers.go create mode 100644 cmd/labs/project/init_test.go create mode 100644 cmd/labs/project/installed.go create mode 100644 cmd/labs/project/installed_test.go create mode 100644 cmd/labs/project/installer.go create mode 100644 cmd/labs/project/installer_test.go create mode 100644 cmd/labs/project/login.go create mode 100644 cmd/labs/project/project.go create mode 100644 cmd/labs/project/project_test.go create mode 100644 cmd/labs/project/proxy.go create mode 100644 cmd/labs/project/schema.json create mode 100644 cmd/labs/project/testdata/.gitignore create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/cache/databrickslabs-blueprint-releases.json create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/config/login.json create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/pyproject.toml create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/other-state-file.json create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/venv/pyvenv.cfg create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/version.json create mode 100644 cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-repositories.json create mode 100644 cmd/labs/project/testdata/installed-in-home/.databrickscfg create mode 100644 cmd/labs/show.go create mode 100644 cmd/labs/uninstall.go create mode 100644 cmd/labs/unpack/zipball.go create mode 100644 cmd/labs/upgrade.go diff --git a/cmd/cmd.go b/cmd/cmd.go index 6dd0f6e2..5d835409 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -10,6 +10,7 @@ import ( "github.com/databricks/cli/cmd/bundle" "github.com/databricks/cli/cmd/configure" "github.com/databricks/cli/cmd/fs" + "github.com/databricks/cli/cmd/labs" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/cmd/sync" "github.com/databricks/cli/cmd/version" @@ -70,6 +71,7 @@ func New(ctx context.Context) *cobra.Command { cli.AddCommand(bundle.New()) cli.AddCommand(configure.New()) cli.AddCommand(fs.New()) + cli.AddCommand(labs.New(ctx)) cli.AddCommand(sync.New()) cli.AddCommand(version.New()) diff --git a/cmd/labs/CODEOWNERS b/cmd/labs/CODEOWNERS new file mode 100644 index 00000000..cc93a75e --- /dev/null +++ b/cmd/labs/CODEOWNERS @@ -0,0 +1 @@ +* @nfx diff --git a/cmd/labs/clear_cache.go b/cmd/labs/clear_cache.go new file mode 100644 index 00000000..e2f531cf --- /dev/null +++ b/cmd/labs/clear_cache.go @@ -0,0 +1,33 @@ +package labs + +import ( + "log/slog" + "os" + + "github.com/databricks/cli/cmd/labs/project" + "github.com/databricks/cli/libs/log" + "github.com/spf13/cobra" +) + +func newClearCacheCommand() *cobra.Command { + return &cobra.Command{ + Use: "clear-cache", + Short: "Clears cache entries from everywhere relevant", + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + projects, err := project.Installed(ctx) + if err != nil { + return err + } + _ = os.Remove(project.PathInLabs(ctx, "databrickslabs-repositories.json")) + logger := log.GetLogger(ctx) + for _, prj := range projects { + logger.Info("clearing labs project cache", slog.String("name", prj.Name)) + _ = os.RemoveAll(prj.CacheDir(ctx)) + // recreating empty cache folder for downstream apps to work normally + _ = prj.EnsureFoldersExist(ctx) + } + return nil + }, + } +} diff --git a/cmd/labs/github/github.go b/cmd/labs/github/github.go new file mode 100644 index 00000000..1dd9fae5 --- /dev/null +++ b/cmd/labs/github/github.go @@ -0,0 +1,66 @@ +package github + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io" + "net/http" + "strings" + + "github.com/databricks/cli/libs/log" +) + +const gitHubAPI = "https://api.github.com" +const gitHubUserContent = "https://raw.githubusercontent.com" + +// Placeholders to use as unique keys in context.Context. +var apiOverride int +var userContentOverride int + +func WithApiOverride(ctx context.Context, override string) context.Context { + return context.WithValue(ctx, &apiOverride, override) +} + +func WithUserContentOverride(ctx context.Context, override string) context.Context { + return context.WithValue(ctx, &userContentOverride, override) +} + +var ErrNotFound = errors.New("not found") + +func getBytes(ctx context.Context, method, url string, body io.Reader) ([]byte, error) { + ao, ok := ctx.Value(&apiOverride).(string) + if ok { + url = strings.Replace(url, gitHubAPI, ao, 1) + } + uco, ok := ctx.Value(&userContentOverride).(string) + if ok { + url = strings.Replace(url, gitHubUserContent, uco, 1) + } + log.Tracef(ctx, "%s %s", method, url) + req, err := http.NewRequestWithContext(ctx, "GET", url, body) + if err != nil { + return nil, err + } + res, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + if res.StatusCode == 404 { + return nil, ErrNotFound + } + if res.StatusCode >= 400 { + return nil, fmt.Errorf("github request failed: %s", res.Status) + } + defer res.Body.Close() + return io.ReadAll(res.Body) +} + +func httpGetAndUnmarshal(ctx context.Context, url string, response any) error { + raw, err := getBytes(ctx, "GET", url, nil) + if err != nil { + return err + } + return json.Unmarshal(raw, response) +} diff --git a/cmd/labs/github/ref.go b/cmd/labs/github/ref.go new file mode 100644 index 00000000..1975f6fb --- /dev/null +++ b/cmd/labs/github/ref.go @@ -0,0 +1,20 @@ +package github + +import ( + "context" + "fmt" + + "github.com/databricks/cli/libs/log" +) + +func ReadFileFromRef(ctx context.Context, org, repo, ref, file string) ([]byte, error) { + log.Debugf(ctx, "Reading %s@%s from %s/%s", file, ref, org, repo) + url := fmt.Sprintf("%s/%s/%s/%s/%s", gitHubUserContent, org, repo, ref, file) + return getBytes(ctx, "GET", url, nil) +} + +func DownloadZipball(ctx context.Context, org, repo, ref string) ([]byte, error) { + log.Debugf(ctx, "Downloading zipball for %s from %s/%s", ref, org, repo) + zipballURL := fmt.Sprintf("%s/repos/%s/%s/zipball/%s", gitHubAPI, org, repo, ref) + return getBytes(ctx, "GET", zipballURL, nil) +} diff --git a/cmd/labs/github/ref_test.go b/cmd/labs/github/ref_test.go new file mode 100644 index 00000000..2a9ffcc5 --- /dev/null +++ b/cmd/labs/github/ref_test.go @@ -0,0 +1,48 @@ +package github + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestFileFromRef(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/databrickslabs/ucx/main/README.md" { + w.Write([]byte(`abc`)) + return + } + t.Logf("Requested: %s", r.URL.Path) + panic("stub required") + })) + defer server.Close() + + ctx := context.Background() + ctx = WithUserContentOverride(ctx, server.URL) + + raw, err := ReadFileFromRef(ctx, "databrickslabs", "ucx", "main", "README.md") + assert.NoError(t, err) + assert.Equal(t, []byte("abc"), raw) +} + +func TestDownloadZipball(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/repos/databrickslabs/ucx/zipball/main" { + w.Write([]byte(`abc`)) + return + } + t.Logf("Requested: %s", r.URL.Path) + panic("stub required") + })) + defer server.Close() + + ctx := context.Background() + ctx = WithApiOverride(ctx, server.URL) + + raw, err := DownloadZipball(ctx, "databrickslabs", "ucx", "main") + assert.NoError(t, err) + assert.Equal(t, []byte("abc"), raw) +} diff --git a/cmd/labs/github/releases.go b/cmd/labs/github/releases.go new file mode 100644 index 00000000..0dae0317 --- /dev/null +++ b/cmd/labs/github/releases.go @@ -0,0 +1,61 @@ +package github + +import ( + "context" + "fmt" + "time" + + "github.com/databricks/cli/cmd/labs/localcache" + "github.com/databricks/cli/libs/log" +) + +const cacheTTL = 1 * time.Hour + +// NewReleaseCache creates a release cache for a repository in the GitHub org. +// Caller has to provide different cache directories for different repositories. +func NewReleaseCache(org, repo, cacheDir string) *ReleaseCache { + pattern := fmt.Sprintf("%s-%s-releases", org, repo) + return &ReleaseCache{ + cache: localcache.NewLocalCache[Versions](cacheDir, pattern, cacheTTL), + Org: org, + Repo: repo, + } +} + +type ReleaseCache struct { + cache localcache.LocalCache[Versions] + Org string + Repo string +} + +func (r *ReleaseCache) Load(ctx context.Context) (Versions, error) { + return r.cache.Load(ctx, func() (Versions, error) { + return getVersions(ctx, r.Org, r.Repo) + }) +} + +// getVersions is considered to be a private API, as we want the usage go through a cache +func getVersions(ctx context.Context, org, repo string) (Versions, error) { + var releases Versions + log.Debugf(ctx, "Fetching latest releases for %s/%s from GitHub API", org, repo) + url := fmt.Sprintf("%s/repos/%s/%s/releases", gitHubAPI, org, repo) + err := httpGetAndUnmarshal(ctx, url, &releases) + return releases, err +} + +type ghAsset struct { + Name string `json:"name"` + ContentType string `json:"content_type"` + Size int `json:"size"` + BrowserDownloadURL string `json:"browser_download_url"` +} + +type Release struct { + Version string `json:"tag_name"` + CreatedAt time.Time `json:"created_at"` + PublishedAt time.Time `json:"published_at"` + ZipballURL string `json:"zipball_url"` + Assets []ghAsset `json:"assets"` +} + +type Versions []Release diff --git a/cmd/labs/github/releases_test.go b/cmd/labs/github/releases_test.go new file mode 100644 index 00000000..ea24a1e2 --- /dev/null +++ b/cmd/labs/github/releases_test.go @@ -0,0 +1,34 @@ +package github + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestLoadsReleasesForCLI(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/repos/databricks/cli/releases" { + w.Write([]byte(`[{"tag_name": "v1.2.3"}, {"tag_name": "v1.2.2"}]`)) + return + } + t.Logf("Requested: %s", r.URL.Path) + panic("stub required") + })) + defer server.Close() + + ctx := context.Background() + ctx = WithApiOverride(ctx, server.URL) + + r := NewReleaseCache("databricks", "cli", t.TempDir()) + all, err := r.Load(ctx) + assert.NoError(t, err) + assert.Len(t, all, 2) + + // no call is made + _, err = r.Load(ctx) + assert.NoError(t, err) +} diff --git a/cmd/labs/github/repositories.go b/cmd/labs/github/repositories.go new file mode 100644 index 00000000..850cdb1c --- /dev/null +++ b/cmd/labs/github/repositories.go @@ -0,0 +1,59 @@ +package github + +import ( + "context" + "fmt" + "time" + + "github.com/databricks/cli/cmd/labs/localcache" + "github.com/databricks/cli/libs/log" +) + +const repositoryCacheTTL = 24 * time.Hour + +func NewRepositoryCache(org, cacheDir string) *repositoryCache { + filename := fmt.Sprintf("%s-repositories", org) + return &repositoryCache{ + cache: localcache.NewLocalCache[Repositories](cacheDir, filename, repositoryCacheTTL), + Org: org, + } +} + +type repositoryCache struct { + cache localcache.LocalCache[Repositories] + Org string +} + +func (r *repositoryCache) Load(ctx context.Context) (Repositories, error) { + return r.cache.Load(ctx, func() (Repositories, error) { + return getRepositories(ctx, r.Org) + }) +} + +// getRepositories is considered to be privata API, as we want the usage to go through a cache +func getRepositories(ctx context.Context, org string) (Repositories, error) { + var repos Repositories + log.Debugf(ctx, "Loading repositories for %s from GitHub API", org) + url := fmt.Sprintf("%s/users/%s/repos", gitHubAPI, org) + err := httpGetAndUnmarshal(ctx, url, &repos) + return repos, err +} + +type Repositories []ghRepo + +type ghRepo struct { + Name string `json:"name"` + Description string `json:"description"` + Langauge string `json:"language"` + DefaultBranch string `json:"default_branch"` + Stars int `json:"stargazers_count"` + IsFork bool `json:"fork"` + IsArchived bool `json:"archived"` + Topics []string `json:"topics"` + HtmlURL string `json:"html_url"` + CloneURL string `json:"clone_url"` + SshURL string `json:"ssh_url"` + License struct { + Name string `json:"name"` + } `json:"license"` +} diff --git a/cmd/labs/github/repositories_test.go b/cmd/labs/github/repositories_test.go new file mode 100644 index 00000000..4f2fef3e --- /dev/null +++ b/cmd/labs/github/repositories_test.go @@ -0,0 +1,30 @@ +package github + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestRepositories(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/users/databrickslabs/repos" { + w.Write([]byte(`[{"name": "x"}]`)) + return + } + t.Logf("Requested: %s", r.URL.Path) + panic("stub required") + })) + defer server.Close() + + ctx := context.Background() + ctx = WithApiOverride(ctx, server.URL) + + r := NewRepositoryCache("databrickslabs", t.TempDir()) + all, err := r.Load(ctx) + assert.NoError(t, err) + assert.True(t, len(all) > 0) +} diff --git a/cmd/labs/install.go b/cmd/labs/install.go new file mode 100644 index 00000000..31db4389 --- /dev/null +++ b/cmd/labs/install.go @@ -0,0 +1,21 @@ +package labs + +import ( + "github.com/databricks/cli/cmd/labs/project" + "github.com/spf13/cobra" +) + +func newInstallCommand() *cobra.Command { + return &cobra.Command{ + Use: "install NAME", + Args: cobra.ExactArgs(1), + Short: "Installs project", + RunE: func(cmd *cobra.Command, args []string) error { + inst, err := project.NewInstaller(cmd, args[0]) + if err != nil { + return err + } + return inst.Install(cmd.Context()) + }, + } +} diff --git a/cmd/labs/installed.go b/cmd/labs/installed.go new file mode 100644 index 00000000..e4249c9f --- /dev/null +++ b/cmd/labs/installed.go @@ -0,0 +1,57 @@ +package labs + +import ( + "fmt" + + "github.com/databricks/cli/cmd/labs/project" + "github.com/databricks/cli/libs/cmdio" + "github.com/spf13/cobra" +) + +func newInstalledCommand() *cobra.Command { + return &cobra.Command{ + Use: "installed", + Short: "List all installed labs", + Annotations: map[string]string{ + "template": cmdio.Heredoc(` + Name Description Version + {{range .Projects}}{{.Name}} {{.Description}} {{.Version}} + {{end}} + `), + }, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + type installedProject struct { + Name string `json:"name"` + Description string `json:"description"` + Version string `json:"version"` + } + projects, err := project.Installed(ctx) + if err != nil { + return err + } + var info struct { + Projects []installedProject `json:"projects"` + } + for _, v := range projects { + description := v.Description + if len(description) > 50 { + description = description[:50] + "..." + } + version, err := v.InstalledVersion(ctx) + if err != nil { + return fmt.Errorf("%s: %w", v.Name, err) + } + info.Projects = append(info.Projects, installedProject{ + Name: v.Name, + Description: description, + Version: version.Version, + }) + } + if len(info.Projects) == 0 { + return fmt.Errorf("no projects installed") + } + return cmdio.Render(ctx, info) + }, + } +} diff --git a/cmd/labs/installed_test.go b/cmd/labs/installed_test.go new file mode 100644 index 00000000..00692f79 --- /dev/null +++ b/cmd/labs/installed_test.go @@ -0,0 +1,19 @@ +package labs_test + +import ( + "context" + "testing" + + "github.com/databricks/cli/internal" + "github.com/databricks/cli/libs/env" +) + +func TestListsInstalledProjects(t *testing.T) { + ctx := context.Background() + ctx = env.WithUserHomeDir(ctx, "project/testdata/installed-in-home") + r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "installed") + r.RunAndExpectOutput(` + Name Description Version + blueprint Blueprint Project v0.3.15 + `) +} diff --git a/cmd/labs/labs.go b/cmd/labs/labs.go new file mode 100644 index 00000000..cccf8ac4 --- /dev/null +++ b/cmd/labs/labs.go @@ -0,0 +1,39 @@ +package labs + +import ( + "context" + + "github.com/databricks/cli/cmd/labs/project" + "github.com/spf13/cobra" +) + +func New(ctx context.Context) *cobra.Command { + cmd := &cobra.Command{ + Use: "labs", + Short: "Manage Databricks Labs installations", + Long: `Manage experimental Databricks Labs apps`, + } + + cmd.AddGroup(&cobra.Group{ + ID: "labs", + Title: "Installed Databricks Labs", + }) + + cmd.AddCommand( + newListCommand(), + newInstallCommand(), + newUpgradeCommand(), + newInstalledCommand(), + newClearCacheCommand(), + newUninstallCommand(), + newShowCommand(), + ) + all, err := project.Installed(ctx) + if err != nil { + panic(err) + } + for _, v := range all { + v.Register(cmd) + } + return cmd +} diff --git a/cmd/labs/list.go b/cmd/labs/list.go new file mode 100644 index 00000000..07cc180c --- /dev/null +++ b/cmd/labs/list.go @@ -0,0 +1,62 @@ +package labs + +import ( + "context" + + "github.com/databricks/cli/cmd/labs/github" + "github.com/databricks/cli/cmd/labs/project" + "github.com/databricks/cli/libs/cmdio" + "github.com/spf13/cobra" +) + +type labsMeta struct { + Name string `json:"name"` + Description string `json:"description"` + License string `json:"license"` +} + +func allRepos(ctx context.Context) (github.Repositories, error) { + cacheDir := project.PathInLabs(ctx) + cache := github.NewRepositoryCache("databrickslabs", cacheDir) + return cache.Load(ctx) +} + +func newListCommand() *cobra.Command { + return &cobra.Command{ + Use: "list", + Short: "List all labs", + Annotations: map[string]string{ + "template": cmdio.Heredoc(` + Name Description + {{range .}}{{.Name}} {{.Description}} + {{end}} + `), + }, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + repositories, err := allRepos(ctx) + if err != nil { + return err + } + info := []labsMeta{} + for _, v := range repositories { + if v.IsArchived { + continue + } + if v.IsFork { + continue + } + description := v.Description + if len(description) > 50 { + description = description[:50] + "..." + } + info = append(info, labsMeta{ + Name: v.Name, + Description: description, + License: v.License.Name, + }) + } + return cmdio.Render(ctx, info) + }, + } +} diff --git a/cmd/labs/list_test.go b/cmd/labs/list_test.go new file mode 100644 index 00000000..925b984a --- /dev/null +++ b/cmd/labs/list_test.go @@ -0,0 +1,19 @@ +package labs_test + +import ( + "context" + "testing" + + "github.com/databricks/cli/internal" + "github.com/databricks/cli/libs/env" + "github.com/stretchr/testify/require" +) + +func TestListingWorks(t *testing.T) { + ctx := context.Background() + ctx = env.WithUserHomeDir(ctx, "project/testdata/installed-in-home") + c := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "list") + stdout, _, err := c.Run() + require.NoError(t, err) + require.Contains(t, stdout.String(), "ucx") +} diff --git a/cmd/labs/localcache/jsonfile.go b/cmd/labs/localcache/jsonfile.go new file mode 100644 index 00000000..495743a5 --- /dev/null +++ b/cmd/labs/localcache/jsonfile.go @@ -0,0 +1,109 @@ +package localcache + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io/fs" + "net/url" + "os" + "path/filepath" + "time" + + "github.com/databricks/cli/libs/log" +) + +const userRW = 0o600 +const ownerRWXworldRX = 0o755 + +func NewLocalCache[T any](dir, name string, validity time.Duration) LocalCache[T] { + return LocalCache[T]{ + dir: dir, + name: name, + validity: validity, + } +} + +type LocalCache[T any] struct { + name string + dir string + validity time.Duration + zero T +} + +func (r *LocalCache[T]) Load(ctx context.Context, refresh func() (T, error)) (T, error) { + cached, err := r.loadCache() + if errors.Is(err, fs.ErrNotExist) { + return r.refreshCache(ctx, refresh, r.zero) + } else if err != nil { + return r.zero, err + } else if time.Since(cached.Refreshed) > r.validity { + return r.refreshCache(ctx, refresh, cached.Data) + } + return cached.Data, nil +} + +type cached[T any] struct { + // we don't use mtime of the file because it's easier to + // for testdata used in the unit tests to be somewhere far + // in the future and don't bother about switching the mtime bit. + Refreshed time.Time `json:"refreshed_at"` + Data T `json:"data"` +} + +func (r *LocalCache[T]) refreshCache(ctx context.Context, refresh func() (T, error), offlineVal T) (T, error) { + data, err := refresh() + var urlError *url.Error + if errors.As(err, &urlError) { + log.Warnf(ctx, "System offline. Cannot refresh cache: %s", urlError) + return offlineVal, nil + } + if err != nil { + return r.zero, fmt.Errorf("refresh: %w", err) + } + return r.writeCache(ctx, data) +} + +func (r *LocalCache[T]) writeCache(ctx context.Context, data T) (T, error) { + cached := &cached[T]{time.Now(), data} + raw, err := json.MarshalIndent(cached, "", " ") + if err != nil { + return r.zero, fmt.Errorf("json marshal: %w", err) + } + cacheFile := r.FileName() + err = os.WriteFile(cacheFile, raw, userRW) + if errors.Is(err, fs.ErrNotExist) { + cacheDir := filepath.Dir(cacheFile) + err := os.MkdirAll(cacheDir, ownerRWXworldRX) + if err != nil { + return r.zero, fmt.Errorf("create %s: %w", cacheDir, err) + } + err = os.WriteFile(cacheFile, raw, userRW) + if err != nil { + return r.zero, fmt.Errorf("retry save cache: %w", err) + } + return data, nil + } else if err != nil { + return r.zero, fmt.Errorf("save cache: %w", err) + } + return data, nil +} + +func (r *LocalCache[T]) FileName() string { + return filepath.Join(r.dir, fmt.Sprintf("%s.json", r.name)) +} + +func (r *LocalCache[T]) loadCache() (*cached[T], error) { + jsonFile := r.FileName() + raw, err := os.ReadFile(r.FileName()) + if err != nil { + return nil, fmt.Errorf("read %s: %w", jsonFile, err) + } + var v cached[T] + err = json.Unmarshal(raw, &v) + if err != nil { + return nil, fmt.Errorf("parse %s: %w", jsonFile, err) + } + return &v, nil +} diff --git a/cmd/labs/localcache/jsonfile_test.go b/cmd/labs/localcache/jsonfile_test.go new file mode 100644 index 00000000..0d852174 --- /dev/null +++ b/cmd/labs/localcache/jsonfile_test.go @@ -0,0 +1,132 @@ +package localcache + +import ( + "context" + "errors" + "fmt" + "net/url" + "runtime" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestCreatesDirectoryIfNeeded(t *testing.T) { + ctx := context.Background() + c := NewLocalCache[int64](t.TempDir(), "some/nested/file", 1*time.Minute) + thing := []int64{0} + tick := func() (int64, error) { + thing[0] += 1 + return thing[0], nil + } + first, err := c.Load(ctx, tick) + assert.NoError(t, err) + assert.Equal(t, first, int64(1)) +} + +func TestImpossibleToCreateDir(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("No /dev/null on windows") + } + ctx := context.Background() + c := NewLocalCache[int64]("/dev/null", "some/nested/file", 1*time.Minute) + thing := []int64{0} + tick := func() (int64, error) { + thing[0] += 1 + return thing[0], nil + } + _, err := c.Load(ctx, tick) + assert.Error(t, err) +} + +func TestRefreshes(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("No /dev/null on windows") + } + ctx := context.Background() + c := NewLocalCache[int64](t.TempDir(), "time", 1*time.Minute) + thing := []int64{0} + tick := func() (int64, error) { + thing[0] += 1 + return thing[0], nil + } + first, err := c.Load(ctx, tick) + assert.NoError(t, err) + + second, err := c.Load(ctx, tick) + assert.NoError(t, err) + assert.Equal(t, first, second) + + c.validity = 0 + third, err := c.Load(ctx, tick) + assert.NoError(t, err) + assert.NotEqual(t, first, third) +} + +func TestSupportOfflineSystem(t *testing.T) { + c := NewLocalCache[int64](t.TempDir(), "time", 1*time.Minute) + thing := []int64{0} + tick := func() (int64, error) { + thing[0] += 1 + return thing[0], nil + } + ctx := context.Background() + first, err := c.Load(ctx, tick) + assert.NoError(t, err) + + tick = func() (int64, error) { + return 0, &url.Error{ + Op: "X", + URL: "Y", + Err: errors.New("nope"), + } + } + + c.validity = 0 + + // offline during refresh + third, err := c.Load(ctx, tick) + assert.NoError(t, err) + assert.Equal(t, first, third) + + // fully offline + c = NewLocalCache[int64](t.TempDir(), "time", 1*time.Minute) + zero, err := c.Load(ctx, tick) + assert.NoError(t, err) + assert.Equal(t, int64(0), zero) +} + +func TestFolderDisappears(t *testing.T) { + if runtime.GOOS == "windows" { + t.Skip("No /dev/null on windows") + } + c := NewLocalCache[int64]("/dev/null", "time", 1*time.Minute) + tick := func() (int64, error) { + now := time.Now().UnixNano() + t.Log("TICKS") + return now, nil + } + ctx := context.Background() + _, err := c.Load(ctx, tick) + assert.Error(t, err) +} + +func TestRefreshFails(t *testing.T) { + c := NewLocalCache[int64](t.TempDir(), "time", 1*time.Minute) + tick := func() (int64, error) { + return 0, fmt.Errorf("nope") + } + ctx := context.Background() + _, err := c.Load(ctx, tick) + assert.EqualError(t, err, "refresh: nope") +} + +func TestWrongType(t *testing.T) { + c := NewLocalCache[chan int](t.TempDir(), "x", 1*time.Minute) + ctx := context.Background() + _, err := c.Load(ctx, func() (chan int, error) { + return make(chan int), nil + }) + assert.EqualError(t, err, "json marshal: json: unsupported type: chan int") +} diff --git a/cmd/labs/project/command_test.go b/cmd/labs/project/command_test.go new file mode 100644 index 00000000..20021879 --- /dev/null +++ b/cmd/labs/project/command_test.go @@ -0,0 +1,69 @@ +package project_test + +import ( + "context" + "path/filepath" + "testing" + "time" + + "github.com/databricks/cli/internal" + "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/python" + "github.com/databricks/databricks-sdk-go" + "github.com/stretchr/testify/assert" +) + +type echoOut struct { + Command string `json:"command"` + Flags map[string]string `json:"flags"` + Env map[string]string `json:"env"` +} + +func devEnvContext(t *testing.T) context.Context { + ctx := context.Background() + ctx = env.WithUserHomeDir(ctx, "testdata/installed-in-home") + py, _ := python.DetectExecutable(ctx) + py, _ = filepath.Abs(py) + ctx = env.Set(ctx, "PYTHON_BIN", py) + return ctx +} + +func TestRunningBlueprintEcho(t *testing.T) { + ctx := devEnvContext(t) + r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "blueprint", "echo") + var out echoOut + r.RunAndParseJSON(&out) + assert.Equal(t, "echo", out.Command) + assert.Equal(t, "something", out.Flags["first"]) + assert.Equal(t, "https://accounts.cloud.databricks.com", out.Env["DATABRICKS_HOST"]) + assert.Equal(t, "cde", out.Env["DATABRICKS_ACCOUNT_ID"]) +} + +func TestRunningBlueprintEchoProfileWrongOverride(t *testing.T) { + ctx := devEnvContext(t) + r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "blueprint", "echo", "--profile", "workspace-profile") + _, _, err := r.Run() + assert.ErrorIs(t, err, databricks.ErrNotAccountClient) +} + +func TestRunningCommand(t *testing.T) { + ctx := devEnvContext(t) + r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "blueprint", "foo") + r.WithStdin() + defer r.CloseStdin() + + r.RunBackground() + r.WaitForTextPrinted("What is your name?", 5*time.Second) + r.SendText("Dude\n") + r.WaitForTextPrinted("Hello, Dude!", 5*time.Second) +} + +func TestRenderingTable(t *testing.T) { + ctx := devEnvContext(t) + r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "blueprint", "table") + r.RunAndExpectOutput(` + Key Value + First Second + Third Fourth + `) +} diff --git a/cmd/labs/project/entrypoint.go b/cmd/labs/project/entrypoint.go new file mode 100644 index 00000000..fedd70a4 --- /dev/null +++ b/cmd/labs/project/entrypoint.go @@ -0,0 +1,250 @@ +package project + +import ( + "context" + "errors" + "fmt" + "io/fs" + "net/http" + "os" + "path/filepath" + "strings" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/internal/build" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/config" + "github.com/spf13/cobra" +) + +type Entrypoint struct { + *Project + + RequireRunningCluster bool `yaml:"require_running_cluster,omitempty"` + IsUnauthenticated bool `yaml:"is_unauthenticated,omitempty"` + IsAccountLevel bool `yaml:"is_account_level,omitempty"` + IsBundleAware bool `yaml:"is_bundle_aware,omitempty"` +} + +var ErrNoLoginConfig = errors.New("no login configuration found") +var ErrMissingClusterID = errors.New("missing a cluster compatible with Databricks Connect") +var ErrMissingWarehouseID = errors.New("missing a SQL warehouse") +var ErrNotInTTY = errors.New("not in an interactive terminal") + +func (e *Entrypoint) NeedsCluster() bool { + if e.Installer == nil { + return false + } + if e.Installer.RequireDatabricksConnect && e.Installer.MinRuntimeVersion == "" { + e.Installer.MinRuntimeVersion = "13.1" + } + return e.Installer.MinRuntimeVersion != "" +} + +func (e *Entrypoint) NeedsWarehouse() bool { + if e.Installer == nil { + return false + } + return len(e.Installer.WarehouseTypes) != 0 +} + +func (e *Entrypoint) Prepare(cmd *cobra.Command) (map[string]string, error) { + ctx := cmd.Context() + libDir := e.EffectiveLibDir(ctx) + environment := map[string]string{ + "DATABRICKS_CLI_VERSION": build.GetInfo().Version, + "DATABRICKS_LABS_CACHE_DIR": e.CacheDir(ctx), + "DATABRICKS_LABS_CONFIG_DIR": e.ConfigDir(ctx), + "DATABRICKS_LABS_STATE_DIR": e.StateDir(ctx), + "DATABRICKS_LABS_LIB_DIR": libDir, + } + if e.IsPythonProject(ctx) { + e.preparePython(ctx, environment) + } + cfg, err := e.validLogin(cmd) + if err != nil { + return nil, fmt.Errorf("login: %w", err) + } + // cleanup auth profile and config file location, + // so that we don't confuse SDKs + cfg.Profile = "" + cfg.ConfigFile = "" + varNames := []string{} + for k, v := range e.environmentFromConfig(cfg) { + environment[k] = v + varNames = append(varNames, k) + } + if e.NeedsCluster() && e.RequireRunningCluster { + err = e.ensureRunningCluster(ctx, cfg) + if err != nil { + return nil, fmt.Errorf("running cluster: %w", err) + } + } + log.Debugf(ctx, "Passing down environment variables: %s", strings.Join(varNames, ", ")) + return environment, nil +} + +func (e *Entrypoint) preparePython(ctx context.Context, environment map[string]string) { + venv := e.virtualEnvPath(ctx) + environment["PATH"] = e.joinPaths(filepath.Join(venv, "bin"), env.Get(ctx, "PATH")) + + // PYTHONPATH extends the standard lookup locations for module files. It follows the same structure as + // the shell's PATH, where you specify one or more directory paths separated by the appropriate delimiter + // (such as colons for Unix or semicolons for Windows). If a directory listed in PYTHONPATH doesn't exist, + // it is disregarded without any notifications. + // + // Beyond regular directories, individual entries in PYTHONPATH can point to zipfiles that contain pure + // Python modules in either their source or compiled forms. It's important to note that extension modules + // cannot be imported from zipfiles. + // + // The initial search path varies depending on your installation but typically commences with the + // prefix/lib/pythonversion path (as indicated by PYTHONHOME). This default path is always included + // in PYTHONPATH. + // + // An extra directory can be included at the beginning of the search path, coming before PYTHONPATH, + // as explained in the Interface options section. You can control the search path from within a Python + // script using the sys.path variable. + // + // Here we are also supporting the "src" layout for python projects. + // + // See https://docs.python.org/3/using/cmdline.html#envvar-PYTHONPATH + libDir := e.EffectiveLibDir(ctx) + // The intention for every install is to be sandboxed - not dependent on anything else than Python binary. + // Having ability to override PYTHONPATH in the mix will break this assumption. Need strong evidence that + // this is really needed. + environment["PYTHONPATH"] = e.joinPaths(libDir, filepath.Join(libDir, "src")) +} + +func (e *Entrypoint) ensureRunningCluster(ctx context.Context, cfg *config.Config) error { + feedback := cmdio.Spinner(ctx) + defer close(feedback) + w, err := databricks.NewWorkspaceClient((*databricks.Config)(cfg)) + if err != nil { + return fmt.Errorf("workspace client: %w", err) + } + // TODO: add in-progress callback to EnsureClusterIsRunning() in SDK + feedback <- "Ensuring the cluster is running..." + err = w.Clusters.EnsureClusterIsRunning(ctx, cfg.ClusterID) + if err != nil { + return fmt.Errorf("ensure running: %w", err) + } + return nil +} + +func (e *Entrypoint) joinPaths(paths ...string) string { + return strings.Join(paths, string(os.PathListSeparator)) +} + +func (e *Entrypoint) envAwareConfig(ctx context.Context) *config.Config { + return &config.Config{ + ConfigFile: filepath.Join(env.UserHomeDir(ctx), ".databrickscfg"), + Loaders: []config.Loader{ + env.NewConfigLoader(ctx), + config.ConfigAttributes, + config.ConfigFile, + }, + } +} + +func (e *Entrypoint) envAwareConfigWithProfile(ctx context.Context, profile string) *config.Config { + cfg := e.envAwareConfig(ctx) + cfg.Profile = profile + return cfg +} + +func (e *Entrypoint) getLoginConfig(cmd *cobra.Command) (*loginConfig, *config.Config, error) { + ctx := cmd.Context() + // it's okay for this config file not to exist, because some environments, + // like GitHub Actions, don't (need) to have it. There's a small downside of + // a warning log message from within Go SDK. + profileOverride := e.profileOverride(cmd) + if profileOverride != "" { + log.Infof(ctx, "Overriding login profile: %s", profileOverride) + return &loginConfig{}, e.envAwareConfigWithProfile(ctx, profileOverride), nil + } + lc, err := e.loadLoginConfig(ctx) + isNoLoginConfig := errors.Is(err, fs.ErrNotExist) + defaultConfig := e.envAwareConfig(ctx) + if isNoLoginConfig && !e.IsBundleAware && e.isAuthConfigured(defaultConfig) { + log.Debugf(ctx, "Login is configured via environment variables") + return &loginConfig{}, defaultConfig, nil + } + if isNoLoginConfig && !e.IsBundleAware { + return nil, nil, ErrNoLoginConfig + } + if !isNoLoginConfig && err != nil { + return nil, nil, fmt.Errorf("load: %w", err) + } + if e.IsAccountLevel { + log.Debugf(ctx, "Using account-level login profile: %s", lc.AccountProfile) + return lc, e.envAwareConfigWithProfile(ctx, lc.AccountProfile), nil + } + if e.IsBundleAware { + err = root.TryConfigureBundle(cmd, []string{}) + if err != nil { + return nil, nil, fmt.Errorf("bundle: %w", err) + } + if b := bundle.GetOrNil(cmd.Context()); b != nil { + log.Infof(ctx, "Using login configuration from Databricks Asset Bundle") + return &loginConfig{}, b.WorkspaceClient().Config, nil + } + } + log.Debugf(ctx, "Using workspace-level login profile: %s", lc.WorkspaceProfile) + return lc, e.envAwareConfigWithProfile(ctx, lc.WorkspaceProfile), nil +} + +func (e *Entrypoint) validLogin(cmd *cobra.Command) (*config.Config, error) { + if e.IsUnauthenticated { + return &config.Config{}, nil + } + lc, cfg, err := e.getLoginConfig(cmd) + if err != nil { + return nil, fmt.Errorf("login config: %w", err) + } + err = cfg.EnsureResolved() + if err != nil { + return nil, err + } + // merge ~/.databrickscfg and ~/.databricks/labs/x/config/login.json when + // it comes to project-specific configuration + if e.NeedsCluster() && cfg.ClusterID == "" { + cfg.ClusterID = lc.ClusterID + } + if e.NeedsWarehouse() && cfg.WarehouseID == "" { + cfg.WarehouseID = lc.WarehouseID + } + isACC := cfg.IsAccountClient() + if e.IsAccountLevel && !isACC { + return nil, databricks.ErrNotAccountClient + } + if e.NeedsCluster() && !isACC && cfg.ClusterID == "" { + return nil, ErrMissingClusterID + } + if e.NeedsWarehouse() && !isACC && cfg.WarehouseID == "" { + return nil, ErrMissingWarehouseID + } + return cfg, nil +} + +func (e *Entrypoint) environmentFromConfig(cfg *config.Config) map[string]string { + env := map[string]string{} + for _, a := range config.ConfigAttributes { + if a.IsZero(cfg) { + continue + } + for _, ev := range a.EnvVars { + env[ev] = a.GetString(cfg) + } + } + return env +} + +func (e *Entrypoint) isAuthConfigured(cfg *config.Config) bool { + r := &http.Request{Header: http.Header{}} + err := cfg.Authenticate(r.WithContext(context.Background())) + return err == nil +} diff --git a/cmd/labs/project/fetcher.go b/cmd/labs/project/fetcher.go new file mode 100644 index 00000000..b677bcd9 --- /dev/null +++ b/cmd/labs/project/fetcher.go @@ -0,0 +1,141 @@ +package project + +import ( + "context" + "errors" + "fmt" + "os" + "path/filepath" + "strings" + + "github.com/databricks/cli/cmd/labs/github" + "github.com/databricks/cli/libs/log" + "github.com/fatih/color" + "github.com/spf13/cobra" +) + +type installable interface { + Install(ctx context.Context) error +} + +type devInstallation struct { + *Project + *cobra.Command +} + +func (d *devInstallation) Install(ctx context.Context) error { + if d.Installer == nil { + return nil + } + _, err := d.Installer.validLogin(d.Command) + if errors.Is(err, ErrNoLoginConfig) { + cfg := d.Installer.envAwareConfig(ctx) + lc := &loginConfig{Entrypoint: d.Installer.Entrypoint} + _, err = lc.askWorkspace(ctx, cfg) + if err != nil { + return fmt.Errorf("ask for workspace: %w", err) + } + err = lc.askAccountProfile(ctx, cfg) + if err != nil { + return fmt.Errorf("ask for account: %w", err) + } + err = lc.EnsureFoldersExist(ctx) + if err != nil { + return fmt.Errorf("folders: %w", err) + } + err = lc.save(ctx) + if err != nil { + return fmt.Errorf("save: %w", err) + } + } + return d.Installer.runHook(d.Command) +} + +func NewInstaller(cmd *cobra.Command, name string) (installable, error) { + if name == "." { + wd, err := os.Getwd() + if err != nil { + return nil, fmt.Errorf("working directory: %w", err) + } + prj, err := Load(cmd.Context(), filepath.Join(wd, "labs.yml")) + if err != nil { + return nil, fmt.Errorf("load: %w", err) + } + cmd.PrintErrln(color.YellowString("Installing %s in development mode from %s", prj.Name, wd)) + return &devInstallation{ + Project: prj, + Command: cmd, + }, nil + } + name, version, ok := strings.Cut(name, "@") + if !ok { + version = "latest" + } + f := &fetcher{name} + version, err := f.checkReleasedVersions(cmd, version) + if err != nil { + return nil, fmt.Errorf("version: %w", err) + } + prj, err := f.loadRemoteProjectDefinition(cmd, version) + if err != nil { + return nil, fmt.Errorf("remote: %w", err) + } + return &installer{ + Project: prj, + version: version, + cmd: cmd, + }, nil +} + +func NewUpgrader(cmd *cobra.Command, name string) (*installer, error) { + f := &fetcher{name} + version, err := f.checkReleasedVersions(cmd, "latest") + if err != nil { + return nil, fmt.Errorf("version: %w", err) + } + prj, err := f.loadRemoteProjectDefinition(cmd, version) + if err != nil { + return nil, fmt.Errorf("remote: %w", err) + } + prj.folder = PathInLabs(cmd.Context(), name) + return &installer{ + Project: prj, + version: version, + cmd: cmd, + }, nil +} + +type fetcher struct { + name string +} + +func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string) (string, error) { + ctx := cmd.Context() + cacheDir := PathInLabs(ctx, f.name, "cache") + // `databricks labs isntall X` doesn't know which exact version to fetch, so first + // we fetch all versions and then pick the latest one dynamically. + versions, err := github.NewReleaseCache("databrickslabs", f.name, cacheDir).Load(ctx) + if err != nil { + return "", fmt.Errorf("versions: %w", err) + } + for _, v := range versions { + if v.Version == version { + return version, nil + } + } + if version == "latest" && len(versions) > 0 { + log.Debugf(ctx, "Latest %s version is: %s", f.name, versions[0].Version) + return versions[0].Version, nil + } + cmd.PrintErrln(color.YellowString("[WARNING] Installing unreleased version: %s", version)) + return version, nil +} + +func (i *fetcher) loadRemoteProjectDefinition(cmd *cobra.Command, version string) (*Project, error) { + ctx := cmd.Context() + raw, err := github.ReadFileFromRef(ctx, "databrickslabs", i.name, version, "labs.yml") + if err != nil { + return nil, fmt.Errorf("read labs.yml from GitHub: %w", err) + } + return readFromBytes(ctx, raw) +} diff --git a/cmd/labs/project/helpers.go b/cmd/labs/project/helpers.go new file mode 100644 index 00000000..9117d875 --- /dev/null +++ b/cmd/labs/project/helpers.go @@ -0,0 +1,35 @@ +package project + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + + "github.com/databricks/cli/libs/env" +) + +func PathInLabs(ctx context.Context, dirs ...string) string { + homdeDir := env.UserHomeDir(ctx) + prefix := []string{homdeDir, ".databricks", "labs"} + return filepath.Join(append(prefix, dirs...)...) +} + +func tryLoadAndParseJSON[T any](jsonFile string) (*T, error) { + raw, err := os.ReadFile(jsonFile) + if errors.Is(err, fs.ErrNotExist) { + return nil, err + } + if err != nil { + return nil, fmt.Errorf("read %s: %w", jsonFile, err) + } + var v T + err = json.Unmarshal(raw, &v) + if err != nil { + return nil, fmt.Errorf("parse %s: %w", jsonFile, err) + } + return &v, nil +} diff --git a/cmd/labs/project/init_test.go b/cmd/labs/project/init_test.go new file mode 100644 index 00000000..959381f5 --- /dev/null +++ b/cmd/labs/project/init_test.go @@ -0,0 +1,13 @@ +package project + +import ( + "log/slog" + "os" +) + +func init() { + slog.SetDefault(slog.New( + slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{ + Level: slog.LevelDebug, + }))) +} diff --git a/cmd/labs/project/installed.go b/cmd/labs/project/installed.go new file mode 100644 index 00000000..77fee544 --- /dev/null +++ b/cmd/labs/project/installed.go @@ -0,0 +1,58 @@ +package project + +import ( + "context" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + + "github.com/databricks/cli/folders" + "github.com/databricks/cli/libs/log" +) + +func projectInDevMode(ctx context.Context) (*Project, error) { + cwd, err := os.Getwd() + if err != nil { + return nil, err + } + folder, err := folders.FindDirWithLeaf(cwd, "labs.yml") + if err != nil { + return nil, err + } + log.Debugf(ctx, "Found project under development in: %s", cwd) + return Load(ctx, filepath.Join(folder, "labs.yml")) +} + +func Installed(ctx context.Context) (projects []*Project, err error) { + labsDir, err := os.ReadDir(PathInLabs(ctx)) + if err != nil && !errors.Is(err, fs.ErrNotExist) { + return nil, err + } + projectDev, err := projectInDevMode(ctx) + if err != nil && !errors.Is(err, fs.ErrNotExist) { + return nil, err + } + if err == nil { + projects = append(projects, projectDev) + } + for _, v := range labsDir { + if !v.IsDir() { + continue + } + if projectDev != nil && v.Name() == projectDev.Name { + continue + } + labsYml := PathInLabs(ctx, v.Name(), "lib", "labs.yml") + prj, err := Load(ctx, labsYml) + if errors.Is(err, fs.ErrNotExist) { + continue + } + if err != nil { + return nil, fmt.Errorf("%s: %w", v.Name(), err) + } + projects = append(projects, prj) + } + return projects, nil +} diff --git a/cmd/labs/project/installed_test.go b/cmd/labs/project/installed_test.go new file mode 100644 index 00000000..e837692d --- /dev/null +++ b/cmd/labs/project/installed_test.go @@ -0,0 +1,19 @@ +package project + +import ( + "context" + "testing" + + "github.com/databricks/cli/libs/env" + "github.com/stretchr/testify/assert" +) + +func TestInstalled(t *testing.T) { + ctx := context.Background() + ctx = env.WithUserHomeDir(ctx, "testdata/installed-in-home") + + all, err := Installed(ctx) + assert.NoError(t, err) + assert.Len(t, all, 1) + assert.Equal(t, "blueprint", all[0].Name) +} diff --git a/cmd/labs/project/installer.go b/cmd/labs/project/installer.go new file mode 100644 index 00000000..2e09ed37 --- /dev/null +++ b/cmd/labs/project/installer.go @@ -0,0 +1,286 @@ +package project + +import ( + "bytes" + "context" + "errors" + "fmt" + "os" + "strings" + + "github.com/databricks/cli/cmd/labs/github" + "github.com/databricks/cli/cmd/labs/unpack" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/databrickscfg" + "github.com/databricks/cli/libs/databrickscfg/cfgpickers" + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/process" + "github.com/databricks/cli/libs/python" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/databricks/databricks-sdk-go/service/sql" + "github.com/fatih/color" + "github.com/spf13/cobra" +) + +const ownerRWXworldRX = 0o755 + +type whTypes []sql.EndpointInfoWarehouseType + +type hook struct { + *Entrypoint `yaml:",inline"` + Script string `yaml:"script"` + RequireDatabricksConnect bool `yaml:"require_databricks_connect,omitempty"` + MinRuntimeVersion string `yaml:"min_runtime_version,omitempty"` + WarehouseTypes whTypes `yaml:"warehouse_types,omitempty"` +} + +func (h *hook) RequireRunningCluster() bool { + if h.Entrypoint == nil { + return false + } + return h.Entrypoint.RequireRunningCluster +} + +func (h *hook) HasPython() bool { + return strings.HasSuffix(h.Script, ".py") +} + +func (h *hook) runHook(cmd *cobra.Command) error { + if h.Script == "" { + return nil + } + ctx := cmd.Context() + envs, err := h.Prepare(cmd) + if err != nil { + return fmt.Errorf("prepare: %w", err) + } + libDir := h.EffectiveLibDir(ctx) + args := []string{} + if strings.HasSuffix(h.Script, ".py") { + args = append(args, h.virtualEnvPython(ctx)) + } + return process.Forwarded(ctx, + append(args, h.Script), + cmd.InOrStdin(), + cmd.OutOrStdout(), + cmd.ErrOrStderr(), + process.WithDir(libDir), + process.WithEnvs(envs)) +} + +type installer struct { + *Project + version string + + // command instance is used for: + // - auth profile flag override + // - standard input, output, and error streams + cmd *cobra.Command +} + +func (i *installer) Install(ctx context.Context) error { + err := i.EnsureFoldersExist(ctx) + if err != nil { + return fmt.Errorf("folders: %w", err) + } + i.folder = PathInLabs(ctx, i.Name) + w, err := i.login(ctx) + if err != nil && errors.Is(err, databrickscfg.ErrNoConfiguration) { + cfg := i.Installer.envAwareConfig(ctx) + w, err = databricks.NewWorkspaceClient((*databricks.Config)(cfg)) + if err != nil { + return fmt.Errorf("no ~/.databrickscfg: %w", err) + } + } else if err != nil { + return fmt.Errorf("login: %w", err) + } + err = i.downloadLibrary(ctx) + if err != nil { + return fmt.Errorf("lib: %w", err) + } + err = i.setupPythonVirtualEnvironment(ctx, w) + if err != nil { + return fmt.Errorf("python: %w", err) + } + err = i.recordVersion(ctx) + if err != nil { + return fmt.Errorf("record version: %w", err) + } + // TODO: failing install hook for "clean installations" (not upgrages) + // should trigger removal of the project, otherwise users end up with + // misconfigured CLIs + err = i.runInstallHook(ctx) + if err != nil { + return fmt.Errorf("installer: %w", err) + } + return nil +} + +func (i *installer) Upgrade(ctx context.Context) error { + err := i.downloadLibrary(ctx) + if err != nil { + return fmt.Errorf("lib: %w", err) + } + err = i.recordVersion(ctx) + if err != nil { + return fmt.Errorf("record version: %w", err) + } + err = i.runInstallHook(ctx) + if err != nil { + return fmt.Errorf("installer: %w", err) + } + return nil +} + +func (i *installer) warningf(text string, v ...any) { + i.cmd.PrintErrln(color.YellowString(text, v...)) +} + +func (i *installer) cleanupLib(ctx context.Context) error { + libDir := i.LibDir(ctx) + err := os.RemoveAll(libDir) + if err != nil { + return fmt.Errorf("remove all: %w", err) + } + return os.MkdirAll(libDir, ownerRWXworldRX) +} + +func (i *installer) recordVersion(ctx context.Context) error { + return i.writeVersionFile(ctx, i.version) +} + +func (i *installer) login(ctx context.Context) (*databricks.WorkspaceClient, error) { + if !cmdio.IsInteractive(ctx) { + log.Debugf(ctx, "Skipping workspace profile prompts in non-interactive mode") + return nil, nil + } + cfg, err := i.metaEntrypoint(ctx).validLogin(i.cmd) + if errors.Is(err, ErrNoLoginConfig) { + cfg = i.Installer.envAwareConfig(ctx) + } else if err != nil { + return nil, fmt.Errorf("valid: %w", err) + } + if !i.HasAccountLevelCommands() && cfg.IsAccountClient() { + return nil, fmt.Errorf("got account-level client, but no account-level commands") + } + lc := &loginConfig{Entrypoint: i.Installer.Entrypoint} + w, err := lc.askWorkspace(ctx, cfg) + if err != nil { + return nil, fmt.Errorf("ask for workspace: %w", err) + } + err = lc.askAccountProfile(ctx, cfg) + if err != nil { + return nil, fmt.Errorf("ask for account: %w", err) + } + err = lc.save(ctx) + if err != nil { + return nil, fmt.Errorf("save: %w", err) + } + return w, nil +} + +func (i *installer) downloadLibrary(ctx context.Context) error { + feedback := cmdio.Spinner(ctx) + defer close(feedback) + feedback <- "Cleaning up previous installation if necessary" + err := i.cleanupLib(ctx) + if err != nil { + return fmt.Errorf("cleanup: %w", err) + } + libTarget := i.LibDir(ctx) + // we may support wheels, jars, and golang binaries. but those are not zipballs + if i.IsZipball() { + feedback <- fmt.Sprintf("Downloading and unpacking zipball for %s", i.version) + return i.downloadAndUnpackZipball(ctx, libTarget) + } + return fmt.Errorf("we only support zipballs for now") +} + +func (i *installer) downloadAndUnpackZipball(ctx context.Context, libTarget string) error { + raw, err := github.DownloadZipball(ctx, "databrickslabs", i.Name, i.version) + if err != nil { + return fmt.Errorf("download zipball from GitHub: %w", err) + } + zipball := unpack.GitHubZipball{Reader: bytes.NewBuffer(raw)} + log.Debugf(ctx, "Unpacking zipball to: %s", libTarget) + return zipball.UnpackTo(libTarget) +} + +func (i *installer) setupPythonVirtualEnvironment(ctx context.Context, w *databricks.WorkspaceClient) error { + if !i.HasPython() { + return nil + } + feedback := cmdio.Spinner(ctx) + defer close(feedback) + feedback <- "Detecting all installed Python interpreters on the system" + pythonInterpreters, err := python.DetectInterpreters(ctx) + if err != nil { + return fmt.Errorf("detect: %w", err) + } + py, err := pythonInterpreters.AtLeast(i.MinPython) + if err != nil { + return fmt.Errorf("min version: %w", err) + } + log.Debugf(ctx, "Detected Python %s at: %s", py.Version, py.Path) + venvPath := i.virtualEnvPath(ctx) + log.Debugf(ctx, "Creating Python Virtual Environment at: %s", venvPath) + feedback <- fmt.Sprintf("Creating Virtual Environment with Python %s", py.Version) + _, err = process.Background(ctx, []string{py.Path, "-m", "venv", venvPath}) + if err != nil { + return fmt.Errorf("create venv: %w", err) + } + if i.Installer != nil && i.Installer.RequireDatabricksConnect { + feedback <- "Determining Databricks Connect version" + cluster, err := w.Clusters.Get(ctx, compute.GetClusterRequest{ + ClusterId: w.Config.ClusterID, + }) + if err != nil { + return fmt.Errorf("cluster: %w", err) + } + runtimeVersion, ok := cfgpickers.GetRuntimeVersion(*cluster) + if !ok { + return fmt.Errorf("unsupported runtime: %s", cluster.SparkVersion) + } + feedback <- fmt.Sprintf("Installing Databricks Connect v%s", runtimeVersion) + pipSpec := fmt.Sprintf("databricks-connect==%s", runtimeVersion) + err = i.installPythonDependencies(ctx, pipSpec) + if err != nil { + return fmt.Errorf("dbconnect: %w", err) + } + } + feedback <- "Installing Python library dependencies" + return i.installPythonDependencies(ctx, ".") +} + +func (i *installer) installPythonDependencies(ctx context.Context, spec string) error { + if !i.IsPythonProject(ctx) { + return nil + } + libDir := i.LibDir(ctx) + log.Debugf(ctx, "Installing Python dependencies for: %s", libDir) + // maybe we'll need to add call one of the two scripts: + // - python3 -m ensurepip --default-pip + // - curl -o https://bootstrap.pypa.io/get-pip.py | python3 + var buf bytes.Buffer + _, err := process.Background(ctx, + []string{i.virtualEnvPython(ctx), "-m", "pip", "install", spec}, + process.WithCombinedOutput(&buf), + process.WithDir(libDir)) + if err != nil { + i.warningf(buf.String()) + return fmt.Errorf("failed to install dependencies of %s", spec) + } + return nil +} + +func (i *installer) runInstallHook(ctx context.Context) error { + if i.Installer == nil { + return nil + } + if i.Installer.Script == "" { + return nil + } + log.Debugf(ctx, "Launching installer script %s in %s", i.Installer.Script, i.LibDir(ctx)) + return i.Installer.runHook(i.cmd) +} diff --git a/cmd/labs/project/installer_test.go b/cmd/labs/project/installer_test.go new file mode 100644 index 00000000..b61026f2 --- /dev/null +++ b/cmd/labs/project/installer_test.go @@ -0,0 +1,415 @@ +package project_test + +import ( + "archive/zip" + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "io/fs" + "net/http" + "net/http/httptest" + "os" + "path" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/databricks/cli/cmd/labs/github" + "github.com/databricks/cli/cmd/labs/project" + "github.com/databricks/cli/internal" + "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/python" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/databricks/databricks-sdk-go/service/iam" + "github.com/databricks/databricks-sdk-go/service/sql" + "github.com/stretchr/testify/require" +) + +const ownerRWXworldRX = 0o755 +const ownerRW = 0o600 + +func zipballFromFolder(src string) ([]byte, error) { + var buf bytes.Buffer + zw := zip.NewWriter(&buf) + rootDir := path.Base(src) // this is required to emulate github ZIP downloads + err := filepath.Walk(src, func(filePath string, info os.FileInfo, err error) error { + if err != nil { + return err + } + relpath, err := filepath.Rel(src, filePath) + if err != nil { + return err + } + relpath = path.Join(rootDir, relpath) + if info.IsDir() { + _, err = zw.Create(relpath + "/") + return err + } + file, err := os.Open(filePath) + if err != nil { + return err + } + defer file.Close() + f, err := zw.Create(relpath) + if err != nil { + return err + } + _, err = io.Copy(f, file) + return err + }) + if err != nil { + return nil, err + } + err = zw.Close() + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} + +func copyTestdata(t *testing.T, name string) string { + // TODO: refactor fs.cp command into a reusable util + tempDir := t.TempDir() + name = strings.ReplaceAll(name, "/", string(os.PathSeparator)) + err := filepath.WalkDir(name, func(path string, d fs.DirEntry, err error) error { + require.NoError(t, err) + dst := strings.TrimPrefix(path, name) + if dst == "" { + return nil + } + if d.IsDir() { + err := os.MkdirAll(filepath.Join(tempDir, dst), ownerRWXworldRX) + require.NoError(t, err) + return nil + } + in, err := os.Open(path) + require.NoError(t, err) + defer in.Close() + out, err := os.Create(filepath.Join(tempDir, dst)) + require.NoError(t, err) + defer out.Close() + _, err = io.Copy(out, in) + require.NoError(t, err) + return nil + }) + require.NoError(t, err) + return tempDir +} + +func installerContext(t *testing.T, server *httptest.Server) context.Context { + ctx := context.Background() + ctx = github.WithApiOverride(ctx, server.URL) + ctx = github.WithUserContentOverride(ctx, server.URL) + ctx = env.WithUserHomeDir(ctx, t.TempDir()) + // trick release cache to thing it went to github already + cachePath := project.PathInLabs(ctx, "blueprint", "cache") + err := os.MkdirAll(cachePath, ownerRWXworldRX) + require.NoError(t, err) + bs := []byte(`{"refreshed_at": "2033-01-01T00:00:00.92857+02:00","data": [{"tag_name": "v0.3.15"}]}`) + err = os.WriteFile(filepath.Join(cachePath, "databrickslabs-blueprint-releases.json"), bs, ownerRW) + require.NoError(t, err) + return ctx +} + +func respondWithJSON(t *testing.T, w http.ResponseWriter, v any) { + raw, err := json.Marshal(v) + if err != nil { + require.NoError(t, err) + } + w.Write(raw) +} + +type fileTree struct { + Path string + MaxDepth int +} + +func (ft fileTree) String() string { + lines := ft.listFiles(ft.Path, ft.MaxDepth) + return strings.Join(lines, "\n") +} + +func (ft fileTree) listFiles(dir string, depth int) (lines []string) { + if ft.MaxDepth > 0 && depth > ft.MaxDepth { + return []string{fmt.Sprintf("deeper than %d levels", ft.MaxDepth)} + } + fileInfo, err := os.ReadDir(dir) + if err != nil { + return []string{err.Error()} + } + for _, entry := range fileInfo { + lines = append(lines, fmt.Sprintf("%s%s", ft.getIndent(depth), entry.Name())) + if entry.IsDir() { + subdir := filepath.Join(dir, entry.Name()) + lines = append(lines, ft.listFiles(subdir, depth+1)...) + } + } + return lines +} + +func (ft fileTree) getIndent(depth int) string { + return "│" + strings.Repeat(" ", depth*2) + "├─ " +} + +func TestInstallerWorksForReleases(t *testing.T) { + defer func() { + if !t.Failed() { + return + } + t.Logf("file tree:\n%s", fileTree{ + Path: filepath.Dir(t.TempDir()), + }) + }() + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/databrickslabs/blueprint/v0.3.15/labs.yml" { + raw, err := os.ReadFile("testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml") + if err != nil { + panic(err) + } + w.Write(raw) + return + } + if r.URL.Path == "/repos/databrickslabs/blueprint/zipball/v0.3.15" { + raw, err := zipballFromFolder("testdata/installed-in-home/.databricks/labs/blueprint/lib") + if err != nil { + panic(err) + } + w.Header().Add("Content-Type", "application/octet-stream") + w.Write(raw) + return + } + if r.URL.Path == "/api/2.0/clusters/get" { + respondWithJSON(t, w, &compute.ClusterDetails{ + State: compute.StateRunning, + }) + return + } + t.Logf("Requested: %s", r.URL.Path) + t.FailNow() + })) + defer server.Close() + + ctx := installerContext(t, server) + + // simulate the case of GitHub Actions + ctx = env.Set(ctx, "DATABRICKS_HOST", server.URL) + ctx = env.Set(ctx, "DATABRICKS_TOKEN", "...") + ctx = env.Set(ctx, "DATABRICKS_CLUSTER_ID", "installer-cluster") + ctx = env.Set(ctx, "DATABRICKS_WAREHOUSE_ID", "installer-warehouse") + + // After the installation, we'll have approximately the following state: + // t.TempDir() + // └── 001 <------------------------------------------------- env.UserHomeDir(ctx) + // ├── .databricks + // │ └── labs + // │ └── blueprint + // │ ├── cache <------------------------------- prj.CacheDir(ctx) + // │ │ └── databrickslabs-blueprint-releases.json + // │ ├── config + // │ ├── lib <--------------------------------- prj.LibDir(ctx) + // │ │ ├── install.py + // │ │ ├── labs.yml + // │ │ ├── main.py + // │ │ └── pyproject.toml + // │ └── state <------------------------------- prj.StateDir(ctx) + // │ ├── venv <---------------------------- prj.virtualEnvPath(ctx) + // │ │ ├── bin + // │ │ │ ├── pip + // │ │ │ ├── ... + // │ │ │ ├── python -> python3.9 + // │ │ │ ├── python3 -> python3.9 <---- prj.virtualEnvPython(ctx) + // │ │ │ └── python3.9 -> (path to a detected python) + // │ │ ├── include + // │ │ ├── lib + // │ │ │ └── python3.9 + // │ │ │ └── site-packages + // │ │ │ ├── ... + // │ │ │ ├── distutils-precedence.pth + r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "install", "blueprint") + r.RunAndExpectOutput("setting up important infrastructure") +} + +func TestInstallerWorksForDevelopment(t *testing.T) { + defer func() { + if !t.Failed() { + return + } + t.Logf("file tree:\n%s", fileTree{ + Path: filepath.Dir(t.TempDir()), + }) + }() + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/api/2.0/clusters/list" { + respondWithJSON(t, w, compute.ListClustersResponse{ + Clusters: []compute.ClusterDetails{ + { + ClusterId: "abc-id", + ClusterName: "first shared", + DataSecurityMode: compute.DataSecurityModeUserIsolation, + SparkVersion: "12.2.x-whatever", + State: compute.StateRunning, + }, + { + ClusterId: "bcd-id", + ClusterName: "second personal", + DataSecurityMode: compute.DataSecurityModeSingleUser, + SparkVersion: "14.5.x-whatever", + State: compute.StateRunning, + SingleUserName: "serge", + }, + }, + }) + return + } + if r.URL.Path == "/api/2.0/preview/scim/v2/Me" { + respondWithJSON(t, w, iam.User{ + UserName: "serge", + }) + return + } + if r.URL.Path == "/api/2.0/clusters/spark-versions" { + respondWithJSON(t, w, compute.GetSparkVersionsResponse{ + Versions: []compute.SparkVersion{ + { + Key: "14.5.x-whatever", + Name: "14.5 (Awesome)", + }, + }, + }) + return + } + if r.URL.Path == "/api/2.0/clusters/get" { + respondWithJSON(t, w, &compute.ClusterDetails{ + State: compute.StateRunning, + }) + return + } + if r.URL.Path == "/api/2.0/sql/warehouses" { + respondWithJSON(t, w, sql.ListWarehousesResponse{ + Warehouses: []sql.EndpointInfo{ + { + Id: "efg-id", + Name: "First PRO Warehouse", + WarehouseType: sql.EndpointInfoWarehouseTypePro, + }, + }, + }) + return + } + t.Logf("Requested: %s", r.URL.Path) + t.FailNow() + })) + defer server.Close() + + wd, _ := os.Getwd() + defer os.Chdir(wd) + + devDir := copyTestdata(t, "testdata/installed-in-home/.databricks/labs/blueprint/lib") + err := os.Chdir(devDir) + require.NoError(t, err) + + ctx := installerContext(t, server) + py, _ := python.DetectExecutable(ctx) + py, _ = filepath.Abs(py) + + // development installer assumes it's in the active virtualenv + ctx = env.Set(ctx, "PYTHON_BIN", py) + + err = os.WriteFile(filepath.Join(env.UserHomeDir(ctx), ".databrickscfg"), []byte(fmt.Sprintf(` +[profile-one] +host = %s +token = ... + +[acc] +host = %s +account_id = abc + `, server.URL, server.URL)), ownerRW) + require.NoError(t, err) + + // We have the following state at this point: + // t.TempDir() + // ├── 001 <------------------ $CWD, prj.EffectiveLibDir(ctx), prj.folder + // │ ├── install.py + // │ ├── labs.yml <--------- prj.IsDeveloperMode(ctx) == true + // │ ├── main.py + // │ └── pyproject.toml + // └── 002 <------------------ env.UserHomeDir(ctx) + // └── .databricks + // └── labs + // └── blueprint <--- project.PathInLabs(ctx, "blueprint"), prj.rootDir(ctx) + // └── cache <--- prj.CacheDir(ctx) + // └── databrickslabs-blueprint-releases.json + + // `databricks labs install .` means "verify this installer i'm developing does work" + r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "install", ".") + r.WithStdin() + defer r.CloseStdin() + + r.RunBackground() + r.WaitForTextPrinted("setting up important infrastructure", 5*time.Second) +} + +func TestUpgraderWorksForReleases(t *testing.T) { + defer func() { + if !t.Failed() { + return + } + t.Logf("file tree:\n%s", fileTree{ + Path: filepath.Dir(t.TempDir()), + }) + }() + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path == "/databrickslabs/blueprint/v0.4.0/labs.yml" { + raw, err := os.ReadFile("testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml") + if err != nil { + panic(err) + } + w.Write(raw) + return + } + if r.URL.Path == "/repos/databrickslabs/blueprint/zipball/v0.4.0" { + raw, err := zipballFromFolder("testdata/installed-in-home/.databricks/labs/blueprint/lib") + if err != nil { + panic(err) + } + w.Header().Add("Content-Type", "application/octet-stream") + w.Write(raw) + return + } + if r.URL.Path == "/api/2.0/clusters/get" { + respondWithJSON(t, w, &compute.ClusterDetails{ + State: compute.StateRunning, + }) + return + } + t.Logf("Requested: %s", r.URL.Path) + t.FailNow() + })) + defer server.Close() + + ctx := installerContext(t, server) + + newHome := copyTestdata(t, "testdata/installed-in-home") + ctx = env.WithUserHomeDir(ctx, newHome) + + py, _ := python.DetectExecutable(ctx) + py, _ = filepath.Abs(py) + ctx = env.Set(ctx, "PYTHON_BIN", py) + + cachePath := project.PathInLabs(ctx, "blueprint", "cache") + bs := []byte(`{"refreshed_at": "2033-01-01T00:00:00.92857+02:00","data": [{"tag_name": "v0.4.0"}]}`) + err := os.WriteFile(filepath.Join(cachePath, "databrickslabs-blueprint-releases.json"), bs, ownerRW) + require.NoError(t, err) + + // simulate the case of GitHub Actions + ctx = env.Set(ctx, "DATABRICKS_HOST", server.URL) + ctx = env.Set(ctx, "DATABRICKS_TOKEN", "...") + ctx = env.Set(ctx, "DATABRICKS_CLUSTER_ID", "installer-cluster") + ctx = env.Set(ctx, "DATABRICKS_WAREHOUSE_ID", "installer-warehouse") + + r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "upgrade", "blueprint") + r.RunAndExpectOutput("setting up important infrastructure") +} diff --git a/cmd/labs/project/login.go b/cmd/labs/project/login.go new file mode 100644 index 00000000..dd235064 --- /dev/null +++ b/cmd/labs/project/login.go @@ -0,0 +1,117 @@ +package project + +import ( + "context" + "encoding/json" + "fmt" + "os" + + "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/databrickscfg/cfgpickers" + "github.com/databricks/cli/libs/log" + "github.com/databricks/databricks-sdk-go" + "github.com/databricks/databricks-sdk-go/config" +) + +type loginConfig struct { + *Entrypoint `json:"-"` + WorkspaceProfile string `json:"workspace_profile,omitempty"` + AccountProfile string `json:"account_profile,omitempty"` + ClusterID string `json:"cluster_id,omitempty"` + WarehouseID string `json:"warehouse_id,omitempty"` +} + +func (lc *loginConfig) askWorkspace(ctx context.Context, cfg *config.Config) (*databricks.WorkspaceClient, error) { + if cfg.IsAccountClient() { + return nil, nil + } + err := lc.askWorkspaceProfile(ctx, cfg) + if err != nil { + return nil, fmt.Errorf("profile: %w", err) + } + w, err := databricks.NewWorkspaceClient((*databricks.Config)(cfg)) + if err != nil { + return nil, fmt.Errorf("client: %w", err) + } + err = lc.askCluster(ctx, w) + if err != nil { + return nil, fmt.Errorf("cluster: %w", err) + } + err = lc.askWarehouse(ctx, w) + if err != nil { + return nil, fmt.Errorf("warehouse: %w", err) + } + return w, nil +} + +func (lc *loginConfig) askWorkspaceProfile(ctx context.Context, cfg *config.Config) (err error) { + if cfg.Profile != "" { + lc.WorkspaceProfile = cfg.Profile + return + } + if !cmdio.IsInteractive(ctx) { + return ErrNotInTTY + } + lc.WorkspaceProfile, err = root.AskForWorkspaceProfile(ctx) + cfg.Profile = lc.WorkspaceProfile + return +} + +func (lc *loginConfig) askCluster(ctx context.Context, w *databricks.WorkspaceClient) (err error) { + if !lc.NeedsCluster() { + return + } + if w.Config.ClusterID != "" { + lc.ClusterID = w.Config.ClusterID + return + } + if !cmdio.IsInteractive(ctx) { + return ErrNotInTTY + } + clusterID, err := cfgpickers.AskForCluster(ctx, w, + cfgpickers.WithDatabricksConnect(lc.Installer.MinRuntimeVersion)) + if err != nil { + return fmt.Errorf("select: %w", err) + } + w.Config.ClusterID = clusterID + lc.ClusterID = clusterID + return +} + +func (lc *loginConfig) askWarehouse(ctx context.Context, w *databricks.WorkspaceClient) (err error) { + if !lc.NeedsWarehouse() { + return + } + if w.Config.WarehouseID != "" { + lc.WarehouseID = w.Config.WarehouseID + return + } + if !cmdio.IsInteractive(ctx) { + return ErrNotInTTY + } + lc.WarehouseID, err = cfgpickers.AskForWarehouse(ctx, w, + cfgpickers.WithWarehouseTypes(lc.Installer.WarehouseTypes...)) + return +} + +func (lc *loginConfig) askAccountProfile(ctx context.Context, cfg *config.Config) (err error) { + if !lc.HasAccountLevelCommands() { + return nil + } + if !cmdio.IsInteractive(ctx) { + return ErrNotInTTY + } + lc.AccountProfile, err = root.AskForAccountProfile(ctx) + return +} + +func (lc *loginConfig) save(ctx context.Context) error { + authFile := lc.loginFile(ctx) + raw, err := json.MarshalIndent(lc, "", " ") + if err != nil { + return err + } + log.Debugf(ctx, "Writing auth configuration to: %s", authFile) + return os.WriteFile(authFile, raw, ownerRW) +} diff --git a/cmd/labs/project/project.go b/cmd/labs/project/project.go new file mode 100644 index 00000000..6adf9a3c --- /dev/null +++ b/cmd/labs/project/project.go @@ -0,0 +1,352 @@ +package project + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "runtime" + "strings" + "time" + + "github.com/databricks/cli/cmd/labs/github" + "github.com/databricks/cli/libs/env" + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/python" + "github.com/databricks/databricks-sdk-go/logger" + "github.com/fatih/color" + "gopkg.in/yaml.v3" + + "github.com/spf13/cobra" +) + +const ownerRW = 0o600 + +func Load(ctx context.Context, labsYml string) (*Project, error) { + raw, err := os.ReadFile(labsYml) + if err != nil { + return nil, fmt.Errorf("read labs.yml: %w", err) + } + project, err := readFromBytes(ctx, raw) + if err != nil { + return nil, err + } + project.folder = filepath.Dir(labsYml) + return project, nil +} + +func readFromBytes(ctx context.Context, labsYmlRaw []byte) (*Project, error) { + var project Project + err := yaml.Unmarshal(labsYmlRaw, &project) + if err != nil { + return nil, fmt.Errorf("parse labs.yml: %w", err) + } + e := (&project).metaEntrypoint(ctx) + if project.Installer != nil { + project.Installer.Entrypoint = e + } + if project.Uninstaller != nil { + project.Uninstaller.Entrypoint = e + } + return &project, nil +} + +type Project struct { + SpecVersion int `yaml:"$version"` + + Name string `yaml:"name"` + Description string `yaml:"description"` + Installer *hook `yaml:"install,omitempty"` + Uninstaller *hook `yaml:"uninstall,omitempty"` + Main string `yaml:"entrypoint"` + MinPython string `yaml:"min_python"` + Commands []*proxy `yaml:"commands,omitempty"` + + folder string +} + +func (p *Project) IsZipball() bool { + // the simplest way of running the project - download ZIP file from github + return true +} + +func (p *Project) HasPython() bool { + if strings.HasSuffix(p.Main, ".py") { + return true + } + if p.Installer != nil && p.Installer.HasPython() { + return true + } + if p.Uninstaller != nil && p.Uninstaller.HasPython() { + return true + } + return p.MinPython != "" +} + +func (p *Project) metaEntrypoint(ctx context.Context) *Entrypoint { + return &Entrypoint{ + Project: p, + RequireRunningCluster: p.requireRunningCluster(), + } +} + +func (p *Project) requireRunningCluster() bool { + if p.Installer != nil && p.Installer.RequireRunningCluster() { + return true + } + for _, v := range p.Commands { + if v.RequireRunningCluster { + return true + } + } + return false +} + +func (p *Project) fileExists(name string) bool { + _, err := os.Stat(name) + return err == nil +} + +func (p *Project) projectFilePath(ctx context.Context, name string) string { + return filepath.Join(p.EffectiveLibDir(ctx), name) +} + +func (p *Project) IsPythonProject(ctx context.Context) bool { + if p.fileExists(p.projectFilePath(ctx, "setup.py")) { + return true + } + if p.fileExists(p.projectFilePath(ctx, "pyproject.toml")) { + return true + } + return false +} + +func (p *Project) IsDeveloperMode(ctx context.Context) bool { + return p.folder != "" && !strings.HasPrefix(p.LibDir(ctx), p.folder) +} + +func (p *Project) HasFolder() bool { + return p.folder != "" +} + +func (p *Project) HasAccountLevelCommands() bool { + for _, v := range p.Commands { + if v.IsAccountLevel { + return true + } + } + return false +} + +func (p *Project) IsBundleAware() bool { + for _, v := range p.Commands { + if v.IsBundleAware { + return true + } + } + return false +} + +func (p *Project) Register(parent *cobra.Command) { + group := &cobra.Command{ + Use: p.Name, + Short: p.Description, + GroupID: "labs", + } + parent.AddCommand(group) + for _, cp := range p.Commands { + cp.register(group) + cp.Entrypoint.Project = p + } +} + +func (p *Project) rootDir(ctx context.Context) string { + return PathInLabs(ctx, p.Name) +} + +func (p *Project) CacheDir(ctx context.Context) string { + return filepath.Join(p.rootDir(ctx), "cache") +} + +func (p *Project) ConfigDir(ctx context.Context) string { + return filepath.Join(p.rootDir(ctx), "config") +} + +func (p *Project) LibDir(ctx context.Context) string { + return filepath.Join(p.rootDir(ctx), "lib") +} + +func (p *Project) EffectiveLibDir(ctx context.Context) string { + if p.IsDeveloperMode(ctx) { + // developer is working on a local checkout, that is not inside of installed root + return p.folder + } + return p.LibDir(ctx) +} + +func (p *Project) StateDir(ctx context.Context) string { + return filepath.Join(p.rootDir(ctx), "state") +} + +func (p *Project) EnsureFoldersExist(ctx context.Context) error { + dirs := []string{p.CacheDir(ctx), p.ConfigDir(ctx), p.LibDir(ctx), p.StateDir(ctx)} + for _, v := range dirs { + err := os.MkdirAll(v, ownerRWXworldRX) + if err != nil { + return fmt.Errorf("folder %s: %w", v, err) + } + } + return nil +} + +func (p *Project) Uninstall(cmd *cobra.Command) error { + if p.Uninstaller != nil { + err := p.Uninstaller.runHook(cmd) + if err != nil { + return fmt.Errorf("uninstall hook: %w", err) + } + } + ctx := cmd.Context() + log.Infof(ctx, "Removing project: %s", p.Name) + return os.RemoveAll(p.rootDir(ctx)) +} + +func (p *Project) virtualEnvPath(ctx context.Context) string { + if p.IsDeveloperMode(ctx) { + // When a virtual environment has been activated, the VIRTUAL_ENV environment variable + // is set to the path of the environment. Since explicitly activating a virtual environment + // is not required to use it, VIRTUAL_ENV cannot be relied upon to determine whether a virtual + // environment is being used. + // + // See https://docs.python.org/3/library/venv.html#how-venvs-work + activatedVenv := env.Get(ctx, "VIRTUAL_ENV") + if activatedVenv != "" { + logger.Debugf(ctx, "(development mode) using active virtual environment from: %s", activatedVenv) + return activatedVenv + } + nonActivatedVenv, err := python.DetectVirtualEnvPath(p.EffectiveLibDir(ctx)) + if err == nil { + logger.Debugf(ctx, "(development mode) using virtual environment from: %s", nonActivatedVenv) + return nonActivatedVenv + } + } + // by default, we pick Virtual Environment from DATABRICKS_LABS_STATE_DIR + return filepath.Join(p.StateDir(ctx), "venv") +} + +func (p *Project) virtualEnvPython(ctx context.Context) string { + overridePython := env.Get(ctx, "PYTHON_BIN") + if overridePython != "" { + return overridePython + } + if runtime.GOOS == "windows" { + return filepath.Join(p.virtualEnvPath(ctx), "Scripts", "python.exe") + } + return filepath.Join(p.virtualEnvPath(ctx), "bin", "python3") +} + +func (p *Project) loginFile(ctx context.Context) string { + if p.IsDeveloperMode(ctx) { + // developers may not want to pollute the state in + // ~/.databricks/labs/X/config while the version is not yet + // released + return p.projectFilePath(ctx, ".databricks-login.json") + } + return filepath.Join(p.ConfigDir(ctx), "login.json") +} + +func (p *Project) loadLoginConfig(ctx context.Context) (*loginConfig, error) { + loginFile := p.loginFile(ctx) + log.Debugf(ctx, "Loading login configuration from: %s", loginFile) + lc, err := tryLoadAndParseJSON[loginConfig](loginFile) + if err != nil { + return nil, fmt.Errorf("try load: %w", err) + } + lc.Entrypoint = p.metaEntrypoint(ctx) + return lc, nil +} + +func (p *Project) versionFile(ctx context.Context) string { + return filepath.Join(p.StateDir(ctx), "version.json") +} + +func (p *Project) InstalledVersion(ctx context.Context) (*version, error) { + if p.IsDeveloperMode(ctx) { + return &version{ + Version: "*", + Date: time.Now(), + }, nil + } + versionFile := p.versionFile(ctx) + log.Debugf(ctx, "Loading installed version info from: %s", versionFile) + return tryLoadAndParseJSON[version](versionFile) +} + +func (p *Project) writeVersionFile(ctx context.Context, ver string) error { + versionFile := p.versionFile(ctx) + raw, err := json.Marshal(version{ + Version: ver, + Date: time.Now(), + }) + if err != nil { + return err + } + log.Debugf(ctx, "Writing installed version info to: %s", versionFile) + return os.WriteFile(versionFile, raw, ownerRW) +} + +// checkUpdates is called before every command of an installed project, +// giving users hints when they need to update their installations. +func (p *Project) checkUpdates(cmd *cobra.Command) error { + ctx := cmd.Context() + if p.IsDeveloperMode(ctx) { + // skipping update check for projects in developer mode, that + // might not be installed yet + return nil + } + r := github.NewReleaseCache("databrickslabs", p.Name, p.CacheDir(ctx)) + versions, err := r.Load(ctx) + if err != nil { + return err + } + installed, err := p.InstalledVersion(ctx) + if err != nil { + return err + } + latest := versions[0] + if installed.Version == latest.Version { + return nil + } + ago := time.Since(latest.PublishedAt) + msg := "[UPGRADE ADVISED] Newer %s version was released %s ago. Please run `databricks labs upgrade %s` to upgrade: %s -> %s" + cmd.PrintErrln(color.YellowString(msg, p.Name, p.timeAgo(ago), p.Name, installed.Version, latest.Version)) + return nil +} + +func (p *Project) timeAgo(dur time.Duration) string { + days := int(dur.Hours()) / 24 + hours := int(dur.Hours()) % 24 + minutes := int(dur.Minutes()) % 60 + if dur < time.Minute { + return "minute" + } else if dur < time.Hour { + return fmt.Sprintf("%d minutes", minutes) + } else if dur < (24 * time.Hour) { + return fmt.Sprintf("%d hours", hours) + } + return fmt.Sprintf("%d days", days) +} + +func (p *Project) profileOverride(cmd *cobra.Command) string { + profileFlag := cmd.Flag("profile") + if profileFlag == nil { + return "" + } + return profileFlag.Value.String() +} + +type version struct { + Version string `json:"version"` + Date time.Time `json:"date"` +} diff --git a/cmd/labs/project/project_test.go b/cmd/labs/project/project_test.go new file mode 100644 index 00000000..79e69bad --- /dev/null +++ b/cmd/labs/project/project_test.go @@ -0,0 +1,22 @@ +package project + +import ( + "context" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +func assertEqualPaths(t *testing.T, expected, actual string) { + expected = strings.ReplaceAll(expected, "/", string(os.PathSeparator)) + assert.Equal(t, expected, actual) +} + +func TestLoad(t *testing.T) { + ctx := context.Background() + prj, err := Load(ctx, "testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml") + assert.NoError(t, err) + assertEqualPaths(t, "testdata/installed-in-home/.databricks/labs/blueprint/lib", prj.folder) +} diff --git a/cmd/labs/project/proxy.go b/cmd/labs/project/proxy.go new file mode 100644 index 00000000..ae7df286 --- /dev/null +++ b/cmd/labs/project/proxy.go @@ -0,0 +1,146 @@ +package project + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io/fs" + "path/filepath" + "strings" + + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/libs/log" + "github.com/databricks/cli/libs/process" + "github.com/spf13/cobra" + "github.com/spf13/pflag" +) + +type proxy struct { + Entrypoint `yaml:",inline"` + Name string `yaml:"name"` + Description string `yaml:"description"` + TableTemplate string `yaml:"table_template,omitempty"` + Flags []flag `yaml:"flags,omitempty"` +} + +func (cp *proxy) register(parent *cobra.Command) { + cmd := &cobra.Command{ + Use: cp.Name, + Short: cp.Description, + RunE: cp.runE, + } + parent.AddCommand(cmd) + flags := cmd.Flags() + for _, flag := range cp.Flags { + flag.register(flags) + } +} + +func (cp *proxy) runE(cmd *cobra.Command, _ []string) error { + err := cp.checkUpdates(cmd) + if err != nil { + return err + } + args, err := cp.commandInput(cmd) + if err != nil { + return err + } + envs, err := cp.Prepare(cmd) + if err != nil { + return fmt.Errorf("entrypoint: %w", err) + } + ctx := cmd.Context() + log.Debugf(ctx, "Forwarding subprocess: %s", strings.Join(args, " ")) + if cp.TableTemplate != "" { + return cp.renderJsonAsTable(cmd, args, envs) + } + err = process.Forwarded(ctx, args, + cmd.InOrStdin(), + cmd.OutOrStdout(), + cmd.ErrOrStderr(), + process.WithEnvs(envs)) + if errors.Is(err, fs.ErrNotExist) && cp.IsPythonProject(ctx) { + msg := "cannot find Python %s. Please re-run: databricks labs install %s" + return fmt.Errorf(msg, cp.MinPython, cp.Name) + } + return err +} + +// [EXPERIMENTAL] this interface contract may change in the future. +// See https://github.com/databricks/cli/issues/994 +func (cp *proxy) renderJsonAsTable(cmd *cobra.Command, args []string, envs map[string]string) error { + var buf bytes.Buffer + ctx := cmd.Context() + err := process.Forwarded(ctx, args, + cmd.InOrStdin(), + &buf, + cmd.ErrOrStderr(), + process.WithEnvs(envs)) + if err != nil { + return err + } + var anyVal any + err = json.Unmarshal(buf.Bytes(), &anyVal) + if err != nil { + return err + } + // IntelliJ eagerly replaces tabs with spaces, even though we're not asking for it + fixedTemplate := strings.ReplaceAll(cp.TableTemplate, "\\t", "\t") + return cmdio.RenderWithTemplate(ctx, anyVal, fixedTemplate) +} + +func (cp *proxy) commandInput(cmd *cobra.Command) ([]string, error) { + flags := cmd.Flags() + commandInput := struct { + Command string `json:"command"` + Flags map[string]any `json:"flags"` + OutputType string `json:"output_type"` + }{ + Command: cp.Name, + Flags: map[string]any{}, + } + for _, f := range cp.Flags { + v, err := f.get(flags) + if err != nil { + return nil, fmt.Errorf("get %s flag: %w", f.Name, err) + } + commandInput.Flags[f.Name] = v + } + logLevelFlag := flags.Lookup("log-level") + if logLevelFlag != nil { + commandInput.Flags["log_level"] = logLevelFlag.Value.String() + } + args := []string{} + ctx := cmd.Context() + if cp.IsPythonProject(ctx) { + args = append(args, cp.virtualEnvPython(ctx)) + libDir := cp.EffectiveLibDir(cmd.Context()) + entrypoint := filepath.Join(libDir, cp.Main) + args = append(args, entrypoint) + } + raw, err := json.Marshal(commandInput) + if err != nil { + return nil, fmt.Errorf("command input: %w", err) + } + args = append(args, string(raw)) + return args, nil +} + +type flag struct { + Name string `yaml:"name"` + Description string `yaml:"description"` + Default any `yaml:"default,omitempty"` +} + +func (f *flag) register(pf *pflag.FlagSet) { + var dflt string + if f.Default != nil { + dflt = fmt.Sprint(f.Default) + } + pf.String(f.Name, dflt, f.Description) +} + +func (f *flag) get(pf *pflag.FlagSet) (any, error) { + return pf.GetString(f.Name) +} diff --git a/cmd/labs/project/schema.json b/cmd/labs/project/schema.json new file mode 100644 index 00000000..a779b15e --- /dev/null +++ b/cmd/labs/project/schema.json @@ -0,0 +1,126 @@ +{ + "id": "https://raw.githubusercontent.com/databricks/cli/feat/labs/cmd/labs/project/schema.json#", + "$schema": "http://json-schema.org/draft-04/schema", + "definitions": { + "entrypoint": { + "type": "object", + "properties": { + "require_running_cluster": { + "type": "boolean", + "default": false + }, + "is_unauthenticated": { + "type": "boolean", + "default": false + }, + "is_account_level": { + "type": "boolean", + "default": false + }, + "is_bundle_aware": { + "type": "boolean", + "default": false + } + } + }, + "hook": { + "type": "object", + "$ref": "#/definitions/entrypoint", + "unevaluatedProperties": true, + "properties": { + "script": { + "type": "string", + "pattern": "^[A-Za-z0-9_-/\\.]+$" + }, + "min_runtime_version": { + "type": "string", + "pattern": "^[0-9]+.[0-9]+$" + }, + "require_databricks_connect": { + "type": "boolean", + "default": false + }, + "warehouse_types": { + "enum": [ "PRO", "CLASSIC", "TYPE_UNSPECIFIED" ] + } + } + }, + "alphanum": { + "type": "string", + "pattern": "^[a-z0-9-]$" + }, + "command": { + "type": "object", + "$ref": "#/definitions/entrypoint", + "unevaluatedProperties": true, + "required": ["name", "description"], + "properties": { + "name": { + "$ref": "#/definitions/alphanum" + }, + "description": { + "type": "string" + }, + "table_template": { + "type": "string" + }, + "flags": { + "$ref": "#/definitions/flag" + } + } + }, + "flag": { + "type": "object", + "required": ["name", "description"], + "properties": { + "name": { + "$ref": "#/definitions/alphanum" + }, + "description": { + "type": "string" + }, + "default": {} + } + } + }, + "type": "object", + "additionalProperties": false, + "required": ["name", "description", "entrypoint"], + "properties": { + "$version": { + "type": "integer", + "default": 1 + }, + "name": { + "$ref": "#/definitions/alphanum", + "description": "Name of the project" + }, + "description": { + "type": "string", + "description": "Short description of the project" + }, + "entrypoint": { + "type": "string", + "description": "Script that routes subcommands" + }, + "min_python": { + "type": "string", + "pattern": "^3.[0-9]+$", + "description": "Minimal Python version required" + }, + "install": { + "$ref": "#/definitions/hook", + "description": "Installation configuration" + }, + "uninstall": { + "$ref": "#/definitions/hook" + }, + "commands": { + "type": "array", + "description": "Exposed commands", + "items": { + "$ref": "#/definitions/command" + } + } + } +} diff --git a/cmd/labs/project/testdata/.gitignore b/cmd/labs/project/testdata/.gitignore new file mode 100644 index 00000000..bd1711fd --- /dev/null +++ b/cmd/labs/project/testdata/.gitignore @@ -0,0 +1 @@ +!.databricks diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/cache/databrickslabs-blueprint-releases.json b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/cache/databrickslabs-blueprint-releases.json new file mode 100644 index 00000000..87651864 --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/cache/databrickslabs-blueprint-releases.json @@ -0,0 +1,8 @@ +{ + "refreshed_at": "2033-01-01T00:00:00.92857+02:00", + "data": [ + { + "tag_name": "v0.3.15" + } + ] + } diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/config/login.json b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/config/login.json new file mode 100644 index 00000000..7b611ba3 --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/config/login.json @@ -0,0 +1,4 @@ +{ + "workspace_profile": "workspace-profile", + "account_profile": "account-profile" +} diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py new file mode 100644 index 00000000..6873257d --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/install.py @@ -0,0 +1 @@ +print(f'setting up important infrastructure') diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml new file mode 100644 index 00000000..0ac4bf82 --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml @@ -0,0 +1,33 @@ +--- +version: 1 +name: blueprint +description: Blueprint Project +install: + min_runtime_version: 13.1 + require_running_cluster: true + warehouse_types: + - PRO + script: install.py +entrypoint: main.py +min_python: 3.9 +commands: + - name: echo + is_account_level: true + description: non-interactive echo + flags: + - name: first + default: something + description: first flag description + - name: foo + description: foo command + flags: + - name: first + description: first flag description + - name: second + description: second flag description + - name: table + description: something that renders a table + table_template: | + Key Value + {{range .records}}{{.key}} {{.value}} + {{end}} diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py new file mode 100644 index 00000000..769ee73e --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/main.py @@ -0,0 +1,27 @@ +import os, sys, json + +payload = json.loads(sys.argv[1]) + +if 'echo' == payload['command']: + json.dump({ + 'command': payload['command'], + 'flags': payload['flags'], + 'env': {k:v for k,v in os.environ.items()} + }, sys.stdout) + sys.exit(0) + +if 'table' == payload['command']: + sys.stderr.write("some intermediate info\n") + json.dump({'records': [ + {'key': 'First', 'value': 'Second'}, + {'key': 'Third', 'value': 'Fourth'}, + ]}, sys.stdout) + sys.exit(0) + +print(f'host is {os.environ["DATABRICKS_HOST"]}') + +print(f'[{payload["command"]}] command flags are {payload["flags"]}') + +answer = input('What is your name? ') + +print(f'Hello, {answer}!') diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/pyproject.toml b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/pyproject.toml new file mode 100644 index 00000000..d33ab1fb --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/lib/pyproject.toml @@ -0,0 +1,11 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "blueprint" +version = "0.3.15" +description = 'Databricks Labs Blueprint' +requires-python = ">=3.9" +classifiers = ["Programming Language :: Python"] +dependencies = [] diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/other-state-file.json b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/other-state-file.json new file mode 100644 index 00000000..0967ef42 --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/other-state-file.json @@ -0,0 +1 @@ +{} diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/venv/pyvenv.cfg b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/venv/pyvenv.cfg new file mode 100644 index 00000000..e69de29b diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/version.json b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/version.json new file mode 100644 index 00000000..4bcae155 --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/blueprint/state/version.json @@ -0,0 +1,4 @@ +{ + "version": "v0.3.15", + "date": "2023-10-24T15:04:05+01:00" +} diff --git a/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-repositories.json b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-repositories.json new file mode 100644 index 00000000..896ebecc --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databricks/labs/databrickslabs-repositories.json @@ -0,0 +1,37 @@ +{ + "refreshed_at": "2033-01-01T00:00:00.92857+02:00", + "data": [ + { + "name": "blueprint", + "description": "Sample project", + "language": "Python", + "default_branch": "main", + "stargazers_count": 100500, + "fork": false, + "archived": false, + "topics": [], + "html_url": "https://github.com/databrickslabs/blueprint", + "clone_url": "https://github.com/databrickslabs/blueprint.git", + "ssh_url": "git@github.com:databrickslabs/blueprint.git", + "license": { + "name": "Other" + } + }, + { + "name": "ucx", + "description": "Unity Catalog Migrations", + "language": "Python", + "default_branch": "main", + "stargazers_count": 100500, + "fork": false, + "archived": false, + "topics": [], + "html_url": "https://github.com/databrickslabs/ucx", + "clone_url": "https://github.com/databrickslabs/ucx.git", + "ssh_url": "git@github.com:databrickslabs/ucx.git", + "license": { + "name": "Other" + } + } + ] +} diff --git a/cmd/labs/project/testdata/installed-in-home/.databrickscfg b/cmd/labs/project/testdata/installed-in-home/.databrickscfg new file mode 100644 index 00000000..ec1bf7bd --- /dev/null +++ b/cmd/labs/project/testdata/installed-in-home/.databrickscfg @@ -0,0 +1,9 @@ +[workspace-profile] +host = https://abc +token = bcd +cluster_id = cde +warehouse_id = def + +[account-profile] +host = https://accounts.cloud.databricks.com +account_id = cde diff --git a/cmd/labs/show.go b/cmd/labs/show.go new file mode 100644 index 00000000..fc9d175c --- /dev/null +++ b/cmd/labs/show.go @@ -0,0 +1,57 @@ +package labs + +import ( + "fmt" + + "github.com/databricks/cli/cmd/labs/project" + "github.com/databricks/cli/libs/cmdio" + "github.com/spf13/cobra" +) + +func newShowCommand() *cobra.Command { + return &cobra.Command{ + Use: "show NAME", + Args: cobra.ExactArgs(1), + Short: "Shows information about the project", + Annotations: map[string]string{ + "template": cmdio.Heredoc(` + Name: {{.name}} + Description: {{.description}} + Python: {{.is_python}} + + Folders: + - lib: {{.lib_dir}} + - cache: {{.cache_dir}} + - config: {{.config_dir}} + + `), + }, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + installed, err := project.Installed(ctx) + if err != nil { + return err + } + if len(installed) == 0 { + return fmt.Errorf("no projects found") + } + name := args[0] + for _, v := range installed { + isDev := name == "." && v.IsDeveloperMode(ctx) + isMatch := name == v.Name + if !(isDev || isMatch) { + continue + } + return cmdio.Render(ctx, map[string]any{ + "name": v.Name, + "description": v.Description, + "cache_dir": v.CacheDir(ctx), + "config_dir": v.ConfigDir(ctx), + "lib_dir": v.EffectiveLibDir(ctx), + "is_python": v.IsPythonProject(ctx), + }) + } + return nil + }, + } +} diff --git a/cmd/labs/uninstall.go b/cmd/labs/uninstall.go new file mode 100644 index 00000000..b2c83fff --- /dev/null +++ b/cmd/labs/uninstall.go @@ -0,0 +1,39 @@ +package labs + +import ( + "fmt" + + "github.com/databricks/cli/cmd/labs/project" + "github.com/spf13/cobra" +) + +func newUninstallCommand() *cobra.Command { + return &cobra.Command{ + Use: "uninstall NAME", + Args: cobra.ExactArgs(1), + Short: "Uninstalls project", + ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) { + var names []string + installed, _ := project.Installed(cmd.Context()) + for _, v := range installed { + names = append(names, v.Name) + } + return names, cobra.ShellCompDirectiveNoFileComp + }, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + installed, err := project.Installed(ctx) + if err != nil { + return err + } + name := args[0] + for _, prj := range installed { + if prj.Name != name { + continue + } + return prj.Uninstall(cmd) + } + return fmt.Errorf("not found: %s", name) + }, + } +} diff --git a/cmd/labs/unpack/zipball.go b/cmd/labs/unpack/zipball.go new file mode 100644 index 00000000..d2cfa8c9 --- /dev/null +++ b/cmd/labs/unpack/zipball.go @@ -0,0 +1,64 @@ +package unpack + +import ( + "archive/zip" + "bytes" + "fmt" + "io" + "os" + "path/filepath" + "strings" +) + +const ownerRWXworldRX = 0o755 + +type GitHubZipball struct { + io.Reader +} + +func (v GitHubZipball) UnpackTo(libTarget string) error { + raw, err := io.ReadAll(v) + if err != nil { + return err + } + zipReader, err := zip.NewReader(bytes.NewReader(raw), int64(len(raw))) + if err != nil { + return fmt.Errorf("zip: %w", err) + } + // GitHub packages entire repo contents into a top-level folder, e.g. databrickslabs-ucx-2800c6b + rootDirInZIP := zipReader.File[0].FileHeader.Name + for _, zf := range zipReader.File { + if zf.Name == rootDirInZIP { + continue + } + normalizedName := strings.TrimPrefix(zf.Name, rootDirInZIP) + targetName := filepath.Join(libTarget, normalizedName) + if zf.FileInfo().IsDir() { + err = os.MkdirAll(targetName, ownerRWXworldRX) + if err != nil { + return fmt.Errorf("mkdir %s: %w", normalizedName, err) + } + continue + } + err = v.extractFile(zf, targetName) + if err != nil { + return fmt.Errorf("extract %s: %w", zf.Name, err) + } + } + return nil +} + +func (v GitHubZipball) extractFile(zf *zip.File, targetName string) error { + reader, err := zf.Open() + if err != nil { + return fmt.Errorf("source: %w", err) + } + defer reader.Close() + writer, err := os.OpenFile(targetName, os.O_CREATE|os.O_RDWR, zf.Mode()) + if err != nil { + return fmt.Errorf("target: %w", err) + } + defer writer.Close() + _, err = io.Copy(writer, reader) + return err +} diff --git a/cmd/labs/upgrade.go b/cmd/labs/upgrade.go new file mode 100644 index 00000000..88b7bc92 --- /dev/null +++ b/cmd/labs/upgrade.go @@ -0,0 +1,21 @@ +package labs + +import ( + "github.com/databricks/cli/cmd/labs/project" + "github.com/spf13/cobra" +) + +func newUpgradeCommand() *cobra.Command { + return &cobra.Command{ + Use: "upgrade NAME", + Args: cobra.ExactArgs(1), + Short: "Upgrades project", + RunE: func(cmd *cobra.Command, args []string) error { + inst, err := project.NewUpgrader(cmd, args[0]) + if err != nil { + return err + } + return inst.Upgrade(cmd.Context()) + }, + } +}