Serge Smertin 2023-11-17 13:47:37 +01:00 committed by GitHub
parent 489d6fa1b8
commit 1b7558cd7d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
48 changed files with 3178 additions and 0 deletions

View File

@ -10,6 +10,7 @@ import (
"github.com/databricks/cli/cmd/bundle" "github.com/databricks/cli/cmd/bundle"
"github.com/databricks/cli/cmd/configure" "github.com/databricks/cli/cmd/configure"
"github.com/databricks/cli/cmd/fs" "github.com/databricks/cli/cmd/fs"
"github.com/databricks/cli/cmd/labs"
"github.com/databricks/cli/cmd/root" "github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/cmd/sync" "github.com/databricks/cli/cmd/sync"
"github.com/databricks/cli/cmd/version" "github.com/databricks/cli/cmd/version"
@ -70,6 +71,7 @@ func New(ctx context.Context) *cobra.Command {
cli.AddCommand(bundle.New()) cli.AddCommand(bundle.New())
cli.AddCommand(configure.New()) cli.AddCommand(configure.New())
cli.AddCommand(fs.New()) cli.AddCommand(fs.New())
cli.AddCommand(labs.New(ctx))
cli.AddCommand(sync.New()) cli.AddCommand(sync.New())
cli.AddCommand(version.New()) cli.AddCommand(version.New())

1
cmd/labs/CODEOWNERS Normal file
View File

@ -0,0 +1 @@
* @nfx

33
cmd/labs/clear_cache.go Normal file
View File

@ -0,0 +1,33 @@
package labs
import (
"log/slog"
"os"
"github.com/databricks/cli/cmd/labs/project"
"github.com/databricks/cli/libs/log"
"github.com/spf13/cobra"
)
func newClearCacheCommand() *cobra.Command {
return &cobra.Command{
Use: "clear-cache",
Short: "Clears cache entries from everywhere relevant",
RunE: func(cmd *cobra.Command, args []string) error {
ctx := cmd.Context()
projects, err := project.Installed(ctx)
if err != nil {
return err
}
_ = os.Remove(project.PathInLabs(ctx, "databrickslabs-repositories.json"))
logger := log.GetLogger(ctx)
for _, prj := range projects {
logger.Info("clearing labs project cache", slog.String("name", prj.Name))
_ = os.RemoveAll(prj.CacheDir(ctx))
// recreating empty cache folder for downstream apps to work normally
_ = prj.EnsureFoldersExist(ctx)
}
return nil
},
}
}

66
cmd/labs/github/github.go Normal file
View File

@ -0,0 +1,66 @@
package github
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"strings"
"github.com/databricks/cli/libs/log"
)
const gitHubAPI = "https://api.github.com"
const gitHubUserContent = "https://raw.githubusercontent.com"
// Placeholders to use as unique keys in context.Context.
var apiOverride int
var userContentOverride int
func WithApiOverride(ctx context.Context, override string) context.Context {
return context.WithValue(ctx, &apiOverride, override)
}
func WithUserContentOverride(ctx context.Context, override string) context.Context {
return context.WithValue(ctx, &userContentOverride, override)
}
var ErrNotFound = errors.New("not found")
func getBytes(ctx context.Context, method, url string, body io.Reader) ([]byte, error) {
ao, ok := ctx.Value(&apiOverride).(string)
if ok {
url = strings.Replace(url, gitHubAPI, ao, 1)
}
uco, ok := ctx.Value(&userContentOverride).(string)
if ok {
url = strings.Replace(url, gitHubUserContent, uco, 1)
}
log.Tracef(ctx, "%s %s", method, url)
req, err := http.NewRequestWithContext(ctx, "GET", url, body)
if err != nil {
return nil, err
}
res, err := http.DefaultClient.Do(req)
if err != nil {
return nil, err
}
if res.StatusCode == 404 {
return nil, ErrNotFound
}
if res.StatusCode >= 400 {
return nil, fmt.Errorf("github request failed: %s", res.Status)
}
defer res.Body.Close()
return io.ReadAll(res.Body)
}
func httpGetAndUnmarshal(ctx context.Context, url string, response any) error {
raw, err := getBytes(ctx, "GET", url, nil)
if err != nil {
return err
}
return json.Unmarshal(raw, response)
}

20
cmd/labs/github/ref.go Normal file
View File

@ -0,0 +1,20 @@
package github
import (
"context"
"fmt"
"github.com/databricks/cli/libs/log"
)
func ReadFileFromRef(ctx context.Context, org, repo, ref, file string) ([]byte, error) {
log.Debugf(ctx, "Reading %s@%s from %s/%s", file, ref, org, repo)
url := fmt.Sprintf("%s/%s/%s/%s/%s", gitHubUserContent, org, repo, ref, file)
return getBytes(ctx, "GET", url, nil)
}
func DownloadZipball(ctx context.Context, org, repo, ref string) ([]byte, error) {
log.Debugf(ctx, "Downloading zipball for %s from %s/%s", ref, org, repo)
zipballURL := fmt.Sprintf("%s/repos/%s/%s/zipball/%s", gitHubAPI, org, repo, ref)
return getBytes(ctx, "GET", zipballURL, nil)
}

View File

@ -0,0 +1,48 @@
package github
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/assert"
)
func TestFileFromRef(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/databrickslabs/ucx/main/README.md" {
w.Write([]byte(`abc`))
return
}
t.Logf("Requested: %s", r.URL.Path)
panic("stub required")
}))
defer server.Close()
ctx := context.Background()
ctx = WithUserContentOverride(ctx, server.URL)
raw, err := ReadFileFromRef(ctx, "databrickslabs", "ucx", "main", "README.md")
assert.NoError(t, err)
assert.Equal(t, []byte("abc"), raw)
}
func TestDownloadZipball(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/repos/databrickslabs/ucx/zipball/main" {
w.Write([]byte(`abc`))
return
}
t.Logf("Requested: %s", r.URL.Path)
panic("stub required")
}))
defer server.Close()
ctx := context.Background()
ctx = WithApiOverride(ctx, server.URL)
raw, err := DownloadZipball(ctx, "databrickslabs", "ucx", "main")
assert.NoError(t, err)
assert.Equal(t, []byte("abc"), raw)
}

View File

@ -0,0 +1,61 @@
package github
import (
"context"
"fmt"
"time"
"github.com/databricks/cli/cmd/labs/localcache"
"github.com/databricks/cli/libs/log"
)
const cacheTTL = 1 * time.Hour
// NewReleaseCache creates a release cache for a repository in the GitHub org.
// Caller has to provide different cache directories for different repositories.
func NewReleaseCache(org, repo, cacheDir string) *ReleaseCache {
pattern := fmt.Sprintf("%s-%s-releases", org, repo)
return &ReleaseCache{
cache: localcache.NewLocalCache[Versions](cacheDir, pattern, cacheTTL),
Org: org,
Repo: repo,
}
}
type ReleaseCache struct {
cache localcache.LocalCache[Versions]
Org string
Repo string
}
func (r *ReleaseCache) Load(ctx context.Context) (Versions, error) {
return r.cache.Load(ctx, func() (Versions, error) {
return getVersions(ctx, r.Org, r.Repo)
})
}
// getVersions is considered to be a private API, as we want the usage go through a cache
func getVersions(ctx context.Context, org, repo string) (Versions, error) {
var releases Versions
log.Debugf(ctx, "Fetching latest releases for %s/%s from GitHub API", org, repo)
url := fmt.Sprintf("%s/repos/%s/%s/releases", gitHubAPI, org, repo)
err := httpGetAndUnmarshal(ctx, url, &releases)
return releases, err
}
type ghAsset struct {
Name string `json:"name"`
ContentType string `json:"content_type"`
Size int `json:"size"`
BrowserDownloadURL string `json:"browser_download_url"`
}
type Release struct {
Version string `json:"tag_name"`
CreatedAt time.Time `json:"created_at"`
PublishedAt time.Time `json:"published_at"`
ZipballURL string `json:"zipball_url"`
Assets []ghAsset `json:"assets"`
}
type Versions []Release

View File

@ -0,0 +1,34 @@
package github
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/assert"
)
func TestLoadsReleasesForCLI(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/repos/databricks/cli/releases" {
w.Write([]byte(`[{"tag_name": "v1.2.3"}, {"tag_name": "v1.2.2"}]`))
return
}
t.Logf("Requested: %s", r.URL.Path)
panic("stub required")
}))
defer server.Close()
ctx := context.Background()
ctx = WithApiOverride(ctx, server.URL)
r := NewReleaseCache("databricks", "cli", t.TempDir())
all, err := r.Load(ctx)
assert.NoError(t, err)
assert.Len(t, all, 2)
// no call is made
_, err = r.Load(ctx)
assert.NoError(t, err)
}

View File

@ -0,0 +1,59 @@
package github
import (
"context"
"fmt"
"time"
"github.com/databricks/cli/cmd/labs/localcache"
"github.com/databricks/cli/libs/log"
)
const repositoryCacheTTL = 24 * time.Hour
func NewRepositoryCache(org, cacheDir string) *repositoryCache {
filename := fmt.Sprintf("%s-repositories", org)
return &repositoryCache{
cache: localcache.NewLocalCache[Repositories](cacheDir, filename, repositoryCacheTTL),
Org: org,
}
}
type repositoryCache struct {
cache localcache.LocalCache[Repositories]
Org string
}
func (r *repositoryCache) Load(ctx context.Context) (Repositories, error) {
return r.cache.Load(ctx, func() (Repositories, error) {
return getRepositories(ctx, r.Org)
})
}
// getRepositories is considered to be privata API, as we want the usage to go through a cache
func getRepositories(ctx context.Context, org string) (Repositories, error) {
var repos Repositories
log.Debugf(ctx, "Loading repositories for %s from GitHub API", org)
url := fmt.Sprintf("%s/users/%s/repos", gitHubAPI, org)
err := httpGetAndUnmarshal(ctx, url, &repos)
return repos, err
}
type Repositories []ghRepo
type ghRepo struct {
Name string `json:"name"`
Description string `json:"description"`
Langauge string `json:"language"`
DefaultBranch string `json:"default_branch"`
Stars int `json:"stargazers_count"`
IsFork bool `json:"fork"`
IsArchived bool `json:"archived"`
Topics []string `json:"topics"`
HtmlURL string `json:"html_url"`
CloneURL string `json:"clone_url"`
SshURL string `json:"ssh_url"`
License struct {
Name string `json:"name"`
} `json:"license"`
}

View File

@ -0,0 +1,30 @@
package github
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"github.com/stretchr/testify/assert"
)
func TestRepositories(t *testing.T) {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/users/databrickslabs/repos" {
w.Write([]byte(`[{"name": "x"}]`))
return
}
t.Logf("Requested: %s", r.URL.Path)
panic("stub required")
}))
defer server.Close()
ctx := context.Background()
ctx = WithApiOverride(ctx, server.URL)
r := NewRepositoryCache("databrickslabs", t.TempDir())
all, err := r.Load(ctx)
assert.NoError(t, err)
assert.True(t, len(all) > 0)
}

21
cmd/labs/install.go Normal file
View File

@ -0,0 +1,21 @@
package labs
import (
"github.com/databricks/cli/cmd/labs/project"
"github.com/spf13/cobra"
)
func newInstallCommand() *cobra.Command {
return &cobra.Command{
Use: "install NAME",
Args: cobra.ExactArgs(1),
Short: "Installs project",
RunE: func(cmd *cobra.Command, args []string) error {
inst, err := project.NewInstaller(cmd, args[0])
if err != nil {
return err
}
return inst.Install(cmd.Context())
},
}
}

57
cmd/labs/installed.go Normal file
View File

@ -0,0 +1,57 @@
package labs
import (
"fmt"
"github.com/databricks/cli/cmd/labs/project"
"github.com/databricks/cli/libs/cmdio"
"github.com/spf13/cobra"
)
func newInstalledCommand() *cobra.Command {
return &cobra.Command{
Use: "installed",
Short: "List all installed labs",
Annotations: map[string]string{
"template": cmdio.Heredoc(`
Name Description Version
{{range .Projects}}{{.Name}} {{.Description}} {{.Version}}
{{end}}
`),
},
RunE: func(cmd *cobra.Command, args []string) error {
ctx := cmd.Context()
type installedProject struct {
Name string `json:"name"`
Description string `json:"description"`
Version string `json:"version"`
}
projects, err := project.Installed(ctx)
if err != nil {
return err
}
var info struct {
Projects []installedProject `json:"projects"`
}
for _, v := range projects {
description := v.Description
if len(description) > 50 {
description = description[:50] + "..."
}
version, err := v.InstalledVersion(ctx)
if err != nil {
return fmt.Errorf("%s: %w", v.Name, err)
}
info.Projects = append(info.Projects, installedProject{
Name: v.Name,
Description: description,
Version: version.Version,
})
}
if len(info.Projects) == 0 {
return fmt.Errorf("no projects installed")
}
return cmdio.Render(ctx, info)
},
}
}

View File

@ -0,0 +1,19 @@
package labs_test
import (
"context"
"testing"
"github.com/databricks/cli/internal"
"github.com/databricks/cli/libs/env"
)
func TestListsInstalledProjects(t *testing.T) {
ctx := context.Background()
ctx = env.WithUserHomeDir(ctx, "project/testdata/installed-in-home")
r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "installed")
r.RunAndExpectOutput(`
Name Description Version
blueprint Blueprint Project v0.3.15
`)
}

39
cmd/labs/labs.go Normal file
View File

@ -0,0 +1,39 @@
package labs
import (
"context"
"github.com/databricks/cli/cmd/labs/project"
"github.com/spf13/cobra"
)
func New(ctx context.Context) *cobra.Command {
cmd := &cobra.Command{
Use: "labs",
Short: "Manage Databricks Labs installations",
Long: `Manage experimental Databricks Labs apps`,
}
cmd.AddGroup(&cobra.Group{
ID: "labs",
Title: "Installed Databricks Labs",
})
cmd.AddCommand(
newListCommand(),
newInstallCommand(),
newUpgradeCommand(),
newInstalledCommand(),
newClearCacheCommand(),
newUninstallCommand(),
newShowCommand(),
)
all, err := project.Installed(ctx)
if err != nil {
panic(err)
}
for _, v := range all {
v.Register(cmd)
}
return cmd
}

62
cmd/labs/list.go Normal file
View File

@ -0,0 +1,62 @@
package labs
import (
"context"
"github.com/databricks/cli/cmd/labs/github"
"github.com/databricks/cli/cmd/labs/project"
"github.com/databricks/cli/libs/cmdio"
"github.com/spf13/cobra"
)
type labsMeta struct {
Name string `json:"name"`
Description string `json:"description"`
License string `json:"license"`
}
func allRepos(ctx context.Context) (github.Repositories, error) {
cacheDir := project.PathInLabs(ctx)
cache := github.NewRepositoryCache("databrickslabs", cacheDir)
return cache.Load(ctx)
}
func newListCommand() *cobra.Command {
return &cobra.Command{
Use: "list",
Short: "List all labs",
Annotations: map[string]string{
"template": cmdio.Heredoc(`
Name Description
{{range .}}{{.Name}} {{.Description}}
{{end}}
`),
},
RunE: func(cmd *cobra.Command, args []string) error {
ctx := cmd.Context()
repositories, err := allRepos(ctx)
if err != nil {
return err
}
info := []labsMeta{}
for _, v := range repositories {
if v.IsArchived {
continue
}
if v.IsFork {
continue
}
description := v.Description
if len(description) > 50 {
description = description[:50] + "..."
}
info = append(info, labsMeta{
Name: v.Name,
Description: description,
License: v.License.Name,
})
}
return cmdio.Render(ctx, info)
},
}
}

19
cmd/labs/list_test.go Normal file
View File

@ -0,0 +1,19 @@
package labs_test
import (
"context"
"testing"
"github.com/databricks/cli/internal"
"github.com/databricks/cli/libs/env"
"github.com/stretchr/testify/require"
)
func TestListingWorks(t *testing.T) {
ctx := context.Background()
ctx = env.WithUserHomeDir(ctx, "project/testdata/installed-in-home")
c := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "list")
stdout, _, err := c.Run()
require.NoError(t, err)
require.Contains(t, stdout.String(), "ucx")
}

View File

@ -0,0 +1,109 @@
package localcache
import (
"context"
"encoding/json"
"errors"
"fmt"
"io/fs"
"net/url"
"os"
"path/filepath"
"time"
"github.com/databricks/cli/libs/log"
)
const userRW = 0o600
const ownerRWXworldRX = 0o755
func NewLocalCache[T any](dir, name string, validity time.Duration) LocalCache[T] {
return LocalCache[T]{
dir: dir,
name: name,
validity: validity,
}
}
type LocalCache[T any] struct {
name string
dir string
validity time.Duration
zero T
}
func (r *LocalCache[T]) Load(ctx context.Context, refresh func() (T, error)) (T, error) {
cached, err := r.loadCache()
if errors.Is(err, fs.ErrNotExist) {
return r.refreshCache(ctx, refresh, r.zero)
} else if err != nil {
return r.zero, err
} else if time.Since(cached.Refreshed) > r.validity {
return r.refreshCache(ctx, refresh, cached.Data)
}
return cached.Data, nil
}
type cached[T any] struct {
// we don't use mtime of the file because it's easier to
// for testdata used in the unit tests to be somewhere far
// in the future and don't bother about switching the mtime bit.
Refreshed time.Time `json:"refreshed_at"`
Data T `json:"data"`
}
func (r *LocalCache[T]) refreshCache(ctx context.Context, refresh func() (T, error), offlineVal T) (T, error) {
data, err := refresh()
var urlError *url.Error
if errors.As(err, &urlError) {
log.Warnf(ctx, "System offline. Cannot refresh cache: %s", urlError)
return offlineVal, nil
}
if err != nil {
return r.zero, fmt.Errorf("refresh: %w", err)
}
return r.writeCache(ctx, data)
}
func (r *LocalCache[T]) writeCache(ctx context.Context, data T) (T, error) {
cached := &cached[T]{time.Now(), data}
raw, err := json.MarshalIndent(cached, "", " ")
if err != nil {
return r.zero, fmt.Errorf("json marshal: %w", err)
}
cacheFile := r.FileName()
err = os.WriteFile(cacheFile, raw, userRW)
if errors.Is(err, fs.ErrNotExist) {
cacheDir := filepath.Dir(cacheFile)
err := os.MkdirAll(cacheDir, ownerRWXworldRX)
if err != nil {
return r.zero, fmt.Errorf("create %s: %w", cacheDir, err)
}
err = os.WriteFile(cacheFile, raw, userRW)
if err != nil {
return r.zero, fmt.Errorf("retry save cache: %w", err)
}
return data, nil
} else if err != nil {
return r.zero, fmt.Errorf("save cache: %w", err)
}
return data, nil
}
func (r *LocalCache[T]) FileName() string {
return filepath.Join(r.dir, fmt.Sprintf("%s.json", r.name))
}
func (r *LocalCache[T]) loadCache() (*cached[T], error) {
jsonFile := r.FileName()
raw, err := os.ReadFile(r.FileName())
if err != nil {
return nil, fmt.Errorf("read %s: %w", jsonFile, err)
}
var v cached[T]
err = json.Unmarshal(raw, &v)
if err != nil {
return nil, fmt.Errorf("parse %s: %w", jsonFile, err)
}
return &v, nil
}

View File

@ -0,0 +1,132 @@
package localcache
import (
"context"
"errors"
"fmt"
"net/url"
"runtime"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func TestCreatesDirectoryIfNeeded(t *testing.T) {
ctx := context.Background()
c := NewLocalCache[int64](t.TempDir(), "some/nested/file", 1*time.Minute)
thing := []int64{0}
tick := func() (int64, error) {
thing[0] += 1
return thing[0], nil
}
first, err := c.Load(ctx, tick)
assert.NoError(t, err)
assert.Equal(t, first, int64(1))
}
func TestImpossibleToCreateDir(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("No /dev/null on windows")
}
ctx := context.Background()
c := NewLocalCache[int64]("/dev/null", "some/nested/file", 1*time.Minute)
thing := []int64{0}
tick := func() (int64, error) {
thing[0] += 1
return thing[0], nil
}
_, err := c.Load(ctx, tick)
assert.Error(t, err)
}
func TestRefreshes(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("No /dev/null on windows")
}
ctx := context.Background()
c := NewLocalCache[int64](t.TempDir(), "time", 1*time.Minute)
thing := []int64{0}
tick := func() (int64, error) {
thing[0] += 1
return thing[0], nil
}
first, err := c.Load(ctx, tick)
assert.NoError(t, err)
second, err := c.Load(ctx, tick)
assert.NoError(t, err)
assert.Equal(t, first, second)
c.validity = 0
third, err := c.Load(ctx, tick)
assert.NoError(t, err)
assert.NotEqual(t, first, third)
}
func TestSupportOfflineSystem(t *testing.T) {
c := NewLocalCache[int64](t.TempDir(), "time", 1*time.Minute)
thing := []int64{0}
tick := func() (int64, error) {
thing[0] += 1
return thing[0], nil
}
ctx := context.Background()
first, err := c.Load(ctx, tick)
assert.NoError(t, err)
tick = func() (int64, error) {
return 0, &url.Error{
Op: "X",
URL: "Y",
Err: errors.New("nope"),
}
}
c.validity = 0
// offline during refresh
third, err := c.Load(ctx, tick)
assert.NoError(t, err)
assert.Equal(t, first, third)
// fully offline
c = NewLocalCache[int64](t.TempDir(), "time", 1*time.Minute)
zero, err := c.Load(ctx, tick)
assert.NoError(t, err)
assert.Equal(t, int64(0), zero)
}
func TestFolderDisappears(t *testing.T) {
if runtime.GOOS == "windows" {
t.Skip("No /dev/null on windows")
}
c := NewLocalCache[int64]("/dev/null", "time", 1*time.Minute)
tick := func() (int64, error) {
now := time.Now().UnixNano()
t.Log("TICKS")
return now, nil
}
ctx := context.Background()
_, err := c.Load(ctx, tick)
assert.Error(t, err)
}
func TestRefreshFails(t *testing.T) {
c := NewLocalCache[int64](t.TempDir(), "time", 1*time.Minute)
tick := func() (int64, error) {
return 0, fmt.Errorf("nope")
}
ctx := context.Background()
_, err := c.Load(ctx, tick)
assert.EqualError(t, err, "refresh: nope")
}
func TestWrongType(t *testing.T) {
c := NewLocalCache[chan int](t.TempDir(), "x", 1*time.Minute)
ctx := context.Background()
_, err := c.Load(ctx, func() (chan int, error) {
return make(chan int), nil
})
assert.EqualError(t, err, "json marshal: json: unsupported type: chan int")
}

View File

@ -0,0 +1,69 @@
package project_test
import (
"context"
"path/filepath"
"testing"
"time"
"github.com/databricks/cli/internal"
"github.com/databricks/cli/libs/env"
"github.com/databricks/cli/libs/python"
"github.com/databricks/databricks-sdk-go"
"github.com/stretchr/testify/assert"
)
type echoOut struct {
Command string `json:"command"`
Flags map[string]string `json:"flags"`
Env map[string]string `json:"env"`
}
func devEnvContext(t *testing.T) context.Context {
ctx := context.Background()
ctx = env.WithUserHomeDir(ctx, "testdata/installed-in-home")
py, _ := python.DetectExecutable(ctx)
py, _ = filepath.Abs(py)
ctx = env.Set(ctx, "PYTHON_BIN", py)
return ctx
}
func TestRunningBlueprintEcho(t *testing.T) {
ctx := devEnvContext(t)
r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "blueprint", "echo")
var out echoOut
r.RunAndParseJSON(&out)
assert.Equal(t, "echo", out.Command)
assert.Equal(t, "something", out.Flags["first"])
assert.Equal(t, "https://accounts.cloud.databricks.com", out.Env["DATABRICKS_HOST"])
assert.Equal(t, "cde", out.Env["DATABRICKS_ACCOUNT_ID"])
}
func TestRunningBlueprintEchoProfileWrongOverride(t *testing.T) {
ctx := devEnvContext(t)
r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "blueprint", "echo", "--profile", "workspace-profile")
_, _, err := r.Run()
assert.ErrorIs(t, err, databricks.ErrNotAccountClient)
}
func TestRunningCommand(t *testing.T) {
ctx := devEnvContext(t)
r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "blueprint", "foo")
r.WithStdin()
defer r.CloseStdin()
r.RunBackground()
r.WaitForTextPrinted("What is your name?", 5*time.Second)
r.SendText("Dude\n")
r.WaitForTextPrinted("Hello, Dude!", 5*time.Second)
}
func TestRenderingTable(t *testing.T) {
ctx := devEnvContext(t)
r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "blueprint", "table")
r.RunAndExpectOutput(`
Key Value
First Second
Third Fourth
`)
}

View File

@ -0,0 +1,250 @@
package project
import (
"context"
"errors"
"fmt"
"io/fs"
"net/http"
"os"
"path/filepath"
"strings"
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/internal/build"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/env"
"github.com/databricks/cli/libs/log"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/config"
"github.com/spf13/cobra"
)
type Entrypoint struct {
*Project
RequireRunningCluster bool `yaml:"require_running_cluster,omitempty"`
IsUnauthenticated bool `yaml:"is_unauthenticated,omitempty"`
IsAccountLevel bool `yaml:"is_account_level,omitempty"`
IsBundleAware bool `yaml:"is_bundle_aware,omitempty"`
}
var ErrNoLoginConfig = errors.New("no login configuration found")
var ErrMissingClusterID = errors.New("missing a cluster compatible with Databricks Connect")
var ErrMissingWarehouseID = errors.New("missing a SQL warehouse")
var ErrNotInTTY = errors.New("not in an interactive terminal")
func (e *Entrypoint) NeedsCluster() bool {
if e.Installer == nil {
return false
}
if e.Installer.RequireDatabricksConnect && e.Installer.MinRuntimeVersion == "" {
e.Installer.MinRuntimeVersion = "13.1"
}
return e.Installer.MinRuntimeVersion != ""
}
func (e *Entrypoint) NeedsWarehouse() bool {
if e.Installer == nil {
return false
}
return len(e.Installer.WarehouseTypes) != 0
}
func (e *Entrypoint) Prepare(cmd *cobra.Command) (map[string]string, error) {
ctx := cmd.Context()
libDir := e.EffectiveLibDir(ctx)
environment := map[string]string{
"DATABRICKS_CLI_VERSION": build.GetInfo().Version,
"DATABRICKS_LABS_CACHE_DIR": e.CacheDir(ctx),
"DATABRICKS_LABS_CONFIG_DIR": e.ConfigDir(ctx),
"DATABRICKS_LABS_STATE_DIR": e.StateDir(ctx),
"DATABRICKS_LABS_LIB_DIR": libDir,
}
if e.IsPythonProject(ctx) {
e.preparePython(ctx, environment)
}
cfg, err := e.validLogin(cmd)
if err != nil {
return nil, fmt.Errorf("login: %w", err)
}
// cleanup auth profile and config file location,
// so that we don't confuse SDKs
cfg.Profile = ""
cfg.ConfigFile = ""
varNames := []string{}
for k, v := range e.environmentFromConfig(cfg) {
environment[k] = v
varNames = append(varNames, k)
}
if e.NeedsCluster() && e.RequireRunningCluster {
err = e.ensureRunningCluster(ctx, cfg)
if err != nil {
return nil, fmt.Errorf("running cluster: %w", err)
}
}
log.Debugf(ctx, "Passing down environment variables: %s", strings.Join(varNames, ", "))
return environment, nil
}
func (e *Entrypoint) preparePython(ctx context.Context, environment map[string]string) {
venv := e.virtualEnvPath(ctx)
environment["PATH"] = e.joinPaths(filepath.Join(venv, "bin"), env.Get(ctx, "PATH"))
// PYTHONPATH extends the standard lookup locations for module files. It follows the same structure as
// the shell's PATH, where you specify one or more directory paths separated by the appropriate delimiter
// (such as colons for Unix or semicolons for Windows). If a directory listed in PYTHONPATH doesn't exist,
// it is disregarded without any notifications.
//
// Beyond regular directories, individual entries in PYTHONPATH can point to zipfiles that contain pure
// Python modules in either their source or compiled forms. It's important to note that extension modules
// cannot be imported from zipfiles.
//
// The initial search path varies depending on your installation but typically commences with the
// prefix/lib/pythonversion path (as indicated by PYTHONHOME). This default path is always included
// in PYTHONPATH.
//
// An extra directory can be included at the beginning of the search path, coming before PYTHONPATH,
// as explained in the Interface options section. You can control the search path from within a Python
// script using the sys.path variable.
//
// Here we are also supporting the "src" layout for python projects.
//
// See https://docs.python.org/3/using/cmdline.html#envvar-PYTHONPATH
libDir := e.EffectiveLibDir(ctx)
// The intention for every install is to be sandboxed - not dependent on anything else than Python binary.
// Having ability to override PYTHONPATH in the mix will break this assumption. Need strong evidence that
// this is really needed.
environment["PYTHONPATH"] = e.joinPaths(libDir, filepath.Join(libDir, "src"))
}
func (e *Entrypoint) ensureRunningCluster(ctx context.Context, cfg *config.Config) error {
feedback := cmdio.Spinner(ctx)
defer close(feedback)
w, err := databricks.NewWorkspaceClient((*databricks.Config)(cfg))
if err != nil {
return fmt.Errorf("workspace client: %w", err)
}
// TODO: add in-progress callback to EnsureClusterIsRunning() in SDK
feedback <- "Ensuring the cluster is running..."
err = w.Clusters.EnsureClusterIsRunning(ctx, cfg.ClusterID)
if err != nil {
return fmt.Errorf("ensure running: %w", err)
}
return nil
}
func (e *Entrypoint) joinPaths(paths ...string) string {
return strings.Join(paths, string(os.PathListSeparator))
}
func (e *Entrypoint) envAwareConfig(ctx context.Context) *config.Config {
return &config.Config{
ConfigFile: filepath.Join(env.UserHomeDir(ctx), ".databrickscfg"),
Loaders: []config.Loader{
env.NewConfigLoader(ctx),
config.ConfigAttributes,
config.ConfigFile,
},
}
}
func (e *Entrypoint) envAwareConfigWithProfile(ctx context.Context, profile string) *config.Config {
cfg := e.envAwareConfig(ctx)
cfg.Profile = profile
return cfg
}
func (e *Entrypoint) getLoginConfig(cmd *cobra.Command) (*loginConfig, *config.Config, error) {
ctx := cmd.Context()
// it's okay for this config file not to exist, because some environments,
// like GitHub Actions, don't (need) to have it. There's a small downside of
// a warning log message from within Go SDK.
profileOverride := e.profileOverride(cmd)
if profileOverride != "" {
log.Infof(ctx, "Overriding login profile: %s", profileOverride)
return &loginConfig{}, e.envAwareConfigWithProfile(ctx, profileOverride), nil
}
lc, err := e.loadLoginConfig(ctx)
isNoLoginConfig := errors.Is(err, fs.ErrNotExist)
defaultConfig := e.envAwareConfig(ctx)
if isNoLoginConfig && !e.IsBundleAware && e.isAuthConfigured(defaultConfig) {
log.Debugf(ctx, "Login is configured via environment variables")
return &loginConfig{}, defaultConfig, nil
}
if isNoLoginConfig && !e.IsBundleAware {
return nil, nil, ErrNoLoginConfig
}
if !isNoLoginConfig && err != nil {
return nil, nil, fmt.Errorf("load: %w", err)
}
if e.IsAccountLevel {
log.Debugf(ctx, "Using account-level login profile: %s", lc.AccountProfile)
return lc, e.envAwareConfigWithProfile(ctx, lc.AccountProfile), nil
}
if e.IsBundleAware {
err = root.TryConfigureBundle(cmd, []string{})
if err != nil {
return nil, nil, fmt.Errorf("bundle: %w", err)
}
if b := bundle.GetOrNil(cmd.Context()); b != nil {
log.Infof(ctx, "Using login configuration from Databricks Asset Bundle")
return &loginConfig{}, b.WorkspaceClient().Config, nil
}
}
log.Debugf(ctx, "Using workspace-level login profile: %s", lc.WorkspaceProfile)
return lc, e.envAwareConfigWithProfile(ctx, lc.WorkspaceProfile), nil
}
func (e *Entrypoint) validLogin(cmd *cobra.Command) (*config.Config, error) {
if e.IsUnauthenticated {
return &config.Config{}, nil
}
lc, cfg, err := e.getLoginConfig(cmd)
if err != nil {
return nil, fmt.Errorf("login config: %w", err)
}
err = cfg.EnsureResolved()
if err != nil {
return nil, err
}
// merge ~/.databrickscfg and ~/.databricks/labs/x/config/login.json when
// it comes to project-specific configuration
if e.NeedsCluster() && cfg.ClusterID == "" {
cfg.ClusterID = lc.ClusterID
}
if e.NeedsWarehouse() && cfg.WarehouseID == "" {
cfg.WarehouseID = lc.WarehouseID
}
isACC := cfg.IsAccountClient()
if e.IsAccountLevel && !isACC {
return nil, databricks.ErrNotAccountClient
}
if e.NeedsCluster() && !isACC && cfg.ClusterID == "" {
return nil, ErrMissingClusterID
}
if e.NeedsWarehouse() && !isACC && cfg.WarehouseID == "" {
return nil, ErrMissingWarehouseID
}
return cfg, nil
}
func (e *Entrypoint) environmentFromConfig(cfg *config.Config) map[string]string {
env := map[string]string{}
for _, a := range config.ConfigAttributes {
if a.IsZero(cfg) {
continue
}
for _, ev := range a.EnvVars {
env[ev] = a.GetString(cfg)
}
}
return env
}
func (e *Entrypoint) isAuthConfigured(cfg *config.Config) bool {
r := &http.Request{Header: http.Header{}}
err := cfg.Authenticate(r.WithContext(context.Background()))
return err == nil
}

141
cmd/labs/project/fetcher.go Normal file
View File

@ -0,0 +1,141 @@
package project
import (
"context"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"github.com/databricks/cli/cmd/labs/github"
"github.com/databricks/cli/libs/log"
"github.com/fatih/color"
"github.com/spf13/cobra"
)
type installable interface {
Install(ctx context.Context) error
}
type devInstallation struct {
*Project
*cobra.Command
}
func (d *devInstallation) Install(ctx context.Context) error {
if d.Installer == nil {
return nil
}
_, err := d.Installer.validLogin(d.Command)
if errors.Is(err, ErrNoLoginConfig) {
cfg := d.Installer.envAwareConfig(ctx)
lc := &loginConfig{Entrypoint: d.Installer.Entrypoint}
_, err = lc.askWorkspace(ctx, cfg)
if err != nil {
return fmt.Errorf("ask for workspace: %w", err)
}
err = lc.askAccountProfile(ctx, cfg)
if err != nil {
return fmt.Errorf("ask for account: %w", err)
}
err = lc.EnsureFoldersExist(ctx)
if err != nil {
return fmt.Errorf("folders: %w", err)
}
err = lc.save(ctx)
if err != nil {
return fmt.Errorf("save: %w", err)
}
}
return d.Installer.runHook(d.Command)
}
func NewInstaller(cmd *cobra.Command, name string) (installable, error) {
if name == "." {
wd, err := os.Getwd()
if err != nil {
return nil, fmt.Errorf("working directory: %w", err)
}
prj, err := Load(cmd.Context(), filepath.Join(wd, "labs.yml"))
if err != nil {
return nil, fmt.Errorf("load: %w", err)
}
cmd.PrintErrln(color.YellowString("Installing %s in development mode from %s", prj.Name, wd))
return &devInstallation{
Project: prj,
Command: cmd,
}, nil
}
name, version, ok := strings.Cut(name, "@")
if !ok {
version = "latest"
}
f := &fetcher{name}
version, err := f.checkReleasedVersions(cmd, version)
if err != nil {
return nil, fmt.Errorf("version: %w", err)
}
prj, err := f.loadRemoteProjectDefinition(cmd, version)
if err != nil {
return nil, fmt.Errorf("remote: %w", err)
}
return &installer{
Project: prj,
version: version,
cmd: cmd,
}, nil
}
func NewUpgrader(cmd *cobra.Command, name string) (*installer, error) {
f := &fetcher{name}
version, err := f.checkReleasedVersions(cmd, "latest")
if err != nil {
return nil, fmt.Errorf("version: %w", err)
}
prj, err := f.loadRemoteProjectDefinition(cmd, version)
if err != nil {
return nil, fmt.Errorf("remote: %w", err)
}
prj.folder = PathInLabs(cmd.Context(), name)
return &installer{
Project: prj,
version: version,
cmd: cmd,
}, nil
}
type fetcher struct {
name string
}
func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string) (string, error) {
ctx := cmd.Context()
cacheDir := PathInLabs(ctx, f.name, "cache")
// `databricks labs isntall X` doesn't know which exact version to fetch, so first
// we fetch all versions and then pick the latest one dynamically.
versions, err := github.NewReleaseCache("databrickslabs", f.name, cacheDir).Load(ctx)
if err != nil {
return "", fmt.Errorf("versions: %w", err)
}
for _, v := range versions {
if v.Version == version {
return version, nil
}
}
if version == "latest" && len(versions) > 0 {
log.Debugf(ctx, "Latest %s version is: %s", f.name, versions[0].Version)
return versions[0].Version, nil
}
cmd.PrintErrln(color.YellowString("[WARNING] Installing unreleased version: %s", version))
return version, nil
}
func (i *fetcher) loadRemoteProjectDefinition(cmd *cobra.Command, version string) (*Project, error) {
ctx := cmd.Context()
raw, err := github.ReadFileFromRef(ctx, "databrickslabs", i.name, version, "labs.yml")
if err != nil {
return nil, fmt.Errorf("read labs.yml from GitHub: %w", err)
}
return readFromBytes(ctx, raw)
}

View File

@ -0,0 +1,35 @@
package project
import (
"context"
"encoding/json"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"github.com/databricks/cli/libs/env"
)
func PathInLabs(ctx context.Context, dirs ...string) string {
homdeDir := env.UserHomeDir(ctx)
prefix := []string{homdeDir, ".databricks", "labs"}
return filepath.Join(append(prefix, dirs...)...)
}
func tryLoadAndParseJSON[T any](jsonFile string) (*T, error) {
raw, err := os.ReadFile(jsonFile)
if errors.Is(err, fs.ErrNotExist) {
return nil, err
}
if err != nil {
return nil, fmt.Errorf("read %s: %w", jsonFile, err)
}
var v T
err = json.Unmarshal(raw, &v)
if err != nil {
return nil, fmt.Errorf("parse %s: %w", jsonFile, err)
}
return &v, nil
}

View File

@ -0,0 +1,13 @@
package project
import (
"log/slog"
"os"
)
func init() {
slog.SetDefault(slog.New(
slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
Level: slog.LevelDebug,
})))
}

View File

@ -0,0 +1,58 @@
package project
import (
"context"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"github.com/databricks/cli/folders"
"github.com/databricks/cli/libs/log"
)
func projectInDevMode(ctx context.Context) (*Project, error) {
cwd, err := os.Getwd()
if err != nil {
return nil, err
}
folder, err := folders.FindDirWithLeaf(cwd, "labs.yml")
if err != nil {
return nil, err
}
log.Debugf(ctx, "Found project under development in: %s", cwd)
return Load(ctx, filepath.Join(folder, "labs.yml"))
}
func Installed(ctx context.Context) (projects []*Project, err error) {
labsDir, err := os.ReadDir(PathInLabs(ctx))
if err != nil && !errors.Is(err, fs.ErrNotExist) {
return nil, err
}
projectDev, err := projectInDevMode(ctx)
if err != nil && !errors.Is(err, fs.ErrNotExist) {
return nil, err
}
if err == nil {
projects = append(projects, projectDev)
}
for _, v := range labsDir {
if !v.IsDir() {
continue
}
if projectDev != nil && v.Name() == projectDev.Name {
continue
}
labsYml := PathInLabs(ctx, v.Name(), "lib", "labs.yml")
prj, err := Load(ctx, labsYml)
if errors.Is(err, fs.ErrNotExist) {
continue
}
if err != nil {
return nil, fmt.Errorf("%s: %w", v.Name(), err)
}
projects = append(projects, prj)
}
return projects, nil
}

View File

@ -0,0 +1,19 @@
package project
import (
"context"
"testing"
"github.com/databricks/cli/libs/env"
"github.com/stretchr/testify/assert"
)
func TestInstalled(t *testing.T) {
ctx := context.Background()
ctx = env.WithUserHomeDir(ctx, "testdata/installed-in-home")
all, err := Installed(ctx)
assert.NoError(t, err)
assert.Len(t, all, 1)
assert.Equal(t, "blueprint", all[0].Name)
}

View File

@ -0,0 +1,286 @@
package project
import (
"bytes"
"context"
"errors"
"fmt"
"os"
"strings"
"github.com/databricks/cli/cmd/labs/github"
"github.com/databricks/cli/cmd/labs/unpack"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/databrickscfg"
"github.com/databricks/cli/libs/databrickscfg/cfgpickers"
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/process"
"github.com/databricks/cli/libs/python"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/databricks/databricks-sdk-go/service/sql"
"github.com/fatih/color"
"github.com/spf13/cobra"
)
const ownerRWXworldRX = 0o755
type whTypes []sql.EndpointInfoWarehouseType
type hook struct {
*Entrypoint `yaml:",inline"`
Script string `yaml:"script"`
RequireDatabricksConnect bool `yaml:"require_databricks_connect,omitempty"`
MinRuntimeVersion string `yaml:"min_runtime_version,omitempty"`
WarehouseTypes whTypes `yaml:"warehouse_types,omitempty"`
}
func (h *hook) RequireRunningCluster() bool {
if h.Entrypoint == nil {
return false
}
return h.Entrypoint.RequireRunningCluster
}
func (h *hook) HasPython() bool {
return strings.HasSuffix(h.Script, ".py")
}
func (h *hook) runHook(cmd *cobra.Command) error {
if h.Script == "" {
return nil
}
ctx := cmd.Context()
envs, err := h.Prepare(cmd)
if err != nil {
return fmt.Errorf("prepare: %w", err)
}
libDir := h.EffectiveLibDir(ctx)
args := []string{}
if strings.HasSuffix(h.Script, ".py") {
args = append(args, h.virtualEnvPython(ctx))
}
return process.Forwarded(ctx,
append(args, h.Script),
cmd.InOrStdin(),
cmd.OutOrStdout(),
cmd.ErrOrStderr(),
process.WithDir(libDir),
process.WithEnvs(envs))
}
type installer struct {
*Project
version string
// command instance is used for:
// - auth profile flag override
// - standard input, output, and error streams
cmd *cobra.Command
}
func (i *installer) Install(ctx context.Context) error {
err := i.EnsureFoldersExist(ctx)
if err != nil {
return fmt.Errorf("folders: %w", err)
}
i.folder = PathInLabs(ctx, i.Name)
w, err := i.login(ctx)
if err != nil && errors.Is(err, databrickscfg.ErrNoConfiguration) {
cfg := i.Installer.envAwareConfig(ctx)
w, err = databricks.NewWorkspaceClient((*databricks.Config)(cfg))
if err != nil {
return fmt.Errorf("no ~/.databrickscfg: %w", err)
}
} else if err != nil {
return fmt.Errorf("login: %w", err)
}
err = i.downloadLibrary(ctx)
if err != nil {
return fmt.Errorf("lib: %w", err)
}
err = i.setupPythonVirtualEnvironment(ctx, w)
if err != nil {
return fmt.Errorf("python: %w", err)
}
err = i.recordVersion(ctx)
if err != nil {
return fmt.Errorf("record version: %w", err)
}
// TODO: failing install hook for "clean installations" (not upgrages)
// should trigger removal of the project, otherwise users end up with
// misconfigured CLIs
err = i.runInstallHook(ctx)
if err != nil {
return fmt.Errorf("installer: %w", err)
}
return nil
}
func (i *installer) Upgrade(ctx context.Context) error {
err := i.downloadLibrary(ctx)
if err != nil {
return fmt.Errorf("lib: %w", err)
}
err = i.recordVersion(ctx)
if err != nil {
return fmt.Errorf("record version: %w", err)
}
err = i.runInstallHook(ctx)
if err != nil {
return fmt.Errorf("installer: %w", err)
}
return nil
}
func (i *installer) warningf(text string, v ...any) {
i.cmd.PrintErrln(color.YellowString(text, v...))
}
func (i *installer) cleanupLib(ctx context.Context) error {
libDir := i.LibDir(ctx)
err := os.RemoveAll(libDir)
if err != nil {
return fmt.Errorf("remove all: %w", err)
}
return os.MkdirAll(libDir, ownerRWXworldRX)
}
func (i *installer) recordVersion(ctx context.Context) error {
return i.writeVersionFile(ctx, i.version)
}
func (i *installer) login(ctx context.Context) (*databricks.WorkspaceClient, error) {
if !cmdio.IsInteractive(ctx) {
log.Debugf(ctx, "Skipping workspace profile prompts in non-interactive mode")
return nil, nil
}
cfg, err := i.metaEntrypoint(ctx).validLogin(i.cmd)
if errors.Is(err, ErrNoLoginConfig) {
cfg = i.Installer.envAwareConfig(ctx)
} else if err != nil {
return nil, fmt.Errorf("valid: %w", err)
}
if !i.HasAccountLevelCommands() && cfg.IsAccountClient() {
return nil, fmt.Errorf("got account-level client, but no account-level commands")
}
lc := &loginConfig{Entrypoint: i.Installer.Entrypoint}
w, err := lc.askWorkspace(ctx, cfg)
if err != nil {
return nil, fmt.Errorf("ask for workspace: %w", err)
}
err = lc.askAccountProfile(ctx, cfg)
if err != nil {
return nil, fmt.Errorf("ask for account: %w", err)
}
err = lc.save(ctx)
if err != nil {
return nil, fmt.Errorf("save: %w", err)
}
return w, nil
}
func (i *installer) downloadLibrary(ctx context.Context) error {
feedback := cmdio.Spinner(ctx)
defer close(feedback)
feedback <- "Cleaning up previous installation if necessary"
err := i.cleanupLib(ctx)
if err != nil {
return fmt.Errorf("cleanup: %w", err)
}
libTarget := i.LibDir(ctx)
// we may support wheels, jars, and golang binaries. but those are not zipballs
if i.IsZipball() {
feedback <- fmt.Sprintf("Downloading and unpacking zipball for %s", i.version)
return i.downloadAndUnpackZipball(ctx, libTarget)
}
return fmt.Errorf("we only support zipballs for now")
}
func (i *installer) downloadAndUnpackZipball(ctx context.Context, libTarget string) error {
raw, err := github.DownloadZipball(ctx, "databrickslabs", i.Name, i.version)
if err != nil {
return fmt.Errorf("download zipball from GitHub: %w", err)
}
zipball := unpack.GitHubZipball{Reader: bytes.NewBuffer(raw)}
log.Debugf(ctx, "Unpacking zipball to: %s", libTarget)
return zipball.UnpackTo(libTarget)
}
func (i *installer) setupPythonVirtualEnvironment(ctx context.Context, w *databricks.WorkspaceClient) error {
if !i.HasPython() {
return nil
}
feedback := cmdio.Spinner(ctx)
defer close(feedback)
feedback <- "Detecting all installed Python interpreters on the system"
pythonInterpreters, err := python.DetectInterpreters(ctx)
if err != nil {
return fmt.Errorf("detect: %w", err)
}
py, err := pythonInterpreters.AtLeast(i.MinPython)
if err != nil {
return fmt.Errorf("min version: %w", err)
}
log.Debugf(ctx, "Detected Python %s at: %s", py.Version, py.Path)
venvPath := i.virtualEnvPath(ctx)
log.Debugf(ctx, "Creating Python Virtual Environment at: %s", venvPath)
feedback <- fmt.Sprintf("Creating Virtual Environment with Python %s", py.Version)
_, err = process.Background(ctx, []string{py.Path, "-m", "venv", venvPath})
if err != nil {
return fmt.Errorf("create venv: %w", err)
}
if i.Installer != nil && i.Installer.RequireDatabricksConnect {
feedback <- "Determining Databricks Connect version"
cluster, err := w.Clusters.Get(ctx, compute.GetClusterRequest{
ClusterId: w.Config.ClusterID,
})
if err != nil {
return fmt.Errorf("cluster: %w", err)
}
runtimeVersion, ok := cfgpickers.GetRuntimeVersion(*cluster)
if !ok {
return fmt.Errorf("unsupported runtime: %s", cluster.SparkVersion)
}
feedback <- fmt.Sprintf("Installing Databricks Connect v%s", runtimeVersion)
pipSpec := fmt.Sprintf("databricks-connect==%s", runtimeVersion)
err = i.installPythonDependencies(ctx, pipSpec)
if err != nil {
return fmt.Errorf("dbconnect: %w", err)
}
}
feedback <- "Installing Python library dependencies"
return i.installPythonDependencies(ctx, ".")
}
func (i *installer) installPythonDependencies(ctx context.Context, spec string) error {
if !i.IsPythonProject(ctx) {
return nil
}
libDir := i.LibDir(ctx)
log.Debugf(ctx, "Installing Python dependencies for: %s", libDir)
// maybe we'll need to add call one of the two scripts:
// - python3 -m ensurepip --default-pip
// - curl -o https://bootstrap.pypa.io/get-pip.py | python3
var buf bytes.Buffer
_, err := process.Background(ctx,
[]string{i.virtualEnvPython(ctx), "-m", "pip", "install", spec},
process.WithCombinedOutput(&buf),
process.WithDir(libDir))
if err != nil {
i.warningf(buf.String())
return fmt.Errorf("failed to install dependencies of %s", spec)
}
return nil
}
func (i *installer) runInstallHook(ctx context.Context) error {
if i.Installer == nil {
return nil
}
if i.Installer.Script == "" {
return nil
}
log.Debugf(ctx, "Launching installer script %s in %s", i.Installer.Script, i.LibDir(ctx))
return i.Installer.runHook(i.cmd)
}

View File

@ -0,0 +1,415 @@
package project_test
import (
"archive/zip"
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"io/fs"
"net/http"
"net/http/httptest"
"os"
"path"
"path/filepath"
"strings"
"testing"
"time"
"github.com/databricks/cli/cmd/labs/github"
"github.com/databricks/cli/cmd/labs/project"
"github.com/databricks/cli/internal"
"github.com/databricks/cli/libs/env"
"github.com/databricks/cli/libs/python"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/databricks/databricks-sdk-go/service/iam"
"github.com/databricks/databricks-sdk-go/service/sql"
"github.com/stretchr/testify/require"
)
const ownerRWXworldRX = 0o755
const ownerRW = 0o600
func zipballFromFolder(src string) ([]byte, error) {
var buf bytes.Buffer
zw := zip.NewWriter(&buf)
rootDir := path.Base(src) // this is required to emulate github ZIP downloads
err := filepath.Walk(src, func(filePath string, info os.FileInfo, err error) error {
if err != nil {
return err
}
relpath, err := filepath.Rel(src, filePath)
if err != nil {
return err
}
relpath = path.Join(rootDir, relpath)
if info.IsDir() {
_, err = zw.Create(relpath + "/")
return err
}
file, err := os.Open(filePath)
if err != nil {
return err
}
defer file.Close()
f, err := zw.Create(relpath)
if err != nil {
return err
}
_, err = io.Copy(f, file)
return err
})
if err != nil {
return nil, err
}
err = zw.Close()
if err != nil {
return nil, err
}
return buf.Bytes(), nil
}
func copyTestdata(t *testing.T, name string) string {
// TODO: refactor fs.cp command into a reusable util
tempDir := t.TempDir()
name = strings.ReplaceAll(name, "/", string(os.PathSeparator))
err := filepath.WalkDir(name, func(path string, d fs.DirEntry, err error) error {
require.NoError(t, err)
dst := strings.TrimPrefix(path, name)
if dst == "" {
return nil
}
if d.IsDir() {
err := os.MkdirAll(filepath.Join(tempDir, dst), ownerRWXworldRX)
require.NoError(t, err)
return nil
}
in, err := os.Open(path)
require.NoError(t, err)
defer in.Close()
out, err := os.Create(filepath.Join(tempDir, dst))
require.NoError(t, err)
defer out.Close()
_, err = io.Copy(out, in)
require.NoError(t, err)
return nil
})
require.NoError(t, err)
return tempDir
}
func installerContext(t *testing.T, server *httptest.Server) context.Context {
ctx := context.Background()
ctx = github.WithApiOverride(ctx, server.URL)
ctx = github.WithUserContentOverride(ctx, server.URL)
ctx = env.WithUserHomeDir(ctx, t.TempDir())
// trick release cache to thing it went to github already
cachePath := project.PathInLabs(ctx, "blueprint", "cache")
err := os.MkdirAll(cachePath, ownerRWXworldRX)
require.NoError(t, err)
bs := []byte(`{"refreshed_at": "2033-01-01T00:00:00.92857+02:00","data": [{"tag_name": "v0.3.15"}]}`)
err = os.WriteFile(filepath.Join(cachePath, "databrickslabs-blueprint-releases.json"), bs, ownerRW)
require.NoError(t, err)
return ctx
}
func respondWithJSON(t *testing.T, w http.ResponseWriter, v any) {
raw, err := json.Marshal(v)
if err != nil {
require.NoError(t, err)
}
w.Write(raw)
}
type fileTree struct {
Path string
MaxDepth int
}
func (ft fileTree) String() string {
lines := ft.listFiles(ft.Path, ft.MaxDepth)
return strings.Join(lines, "\n")
}
func (ft fileTree) listFiles(dir string, depth int) (lines []string) {
if ft.MaxDepth > 0 && depth > ft.MaxDepth {
return []string{fmt.Sprintf("deeper than %d levels", ft.MaxDepth)}
}
fileInfo, err := os.ReadDir(dir)
if err != nil {
return []string{err.Error()}
}
for _, entry := range fileInfo {
lines = append(lines, fmt.Sprintf("%s%s", ft.getIndent(depth), entry.Name()))
if entry.IsDir() {
subdir := filepath.Join(dir, entry.Name())
lines = append(lines, ft.listFiles(subdir, depth+1)...)
}
}
return lines
}
func (ft fileTree) getIndent(depth int) string {
return "│" + strings.Repeat(" ", depth*2) + "├─ "
}
func TestInstallerWorksForReleases(t *testing.T) {
defer func() {
if !t.Failed() {
return
}
t.Logf("file tree:\n%s", fileTree{
Path: filepath.Dir(t.TempDir()),
})
}()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/databrickslabs/blueprint/v0.3.15/labs.yml" {
raw, err := os.ReadFile("testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml")
if err != nil {
panic(err)
}
w.Write(raw)
return
}
if r.URL.Path == "/repos/databrickslabs/blueprint/zipball/v0.3.15" {
raw, err := zipballFromFolder("testdata/installed-in-home/.databricks/labs/blueprint/lib")
if err != nil {
panic(err)
}
w.Header().Add("Content-Type", "application/octet-stream")
w.Write(raw)
return
}
if r.URL.Path == "/api/2.0/clusters/get" {
respondWithJSON(t, w, &compute.ClusterDetails{
State: compute.StateRunning,
})
return
}
t.Logf("Requested: %s", r.URL.Path)
t.FailNow()
}))
defer server.Close()
ctx := installerContext(t, server)
// simulate the case of GitHub Actions
ctx = env.Set(ctx, "DATABRICKS_HOST", server.URL)
ctx = env.Set(ctx, "DATABRICKS_TOKEN", "...")
ctx = env.Set(ctx, "DATABRICKS_CLUSTER_ID", "installer-cluster")
ctx = env.Set(ctx, "DATABRICKS_WAREHOUSE_ID", "installer-warehouse")
// After the installation, we'll have approximately the following state:
// t.TempDir()
// └── 001 <------------------------------------------------- env.UserHomeDir(ctx)
// ├── .databricks
// │ └── labs
// │ └── blueprint
// │ ├── cache <------------------------------- prj.CacheDir(ctx)
// │ │ └── databrickslabs-blueprint-releases.json
// │ ├── config
// │ ├── lib <--------------------------------- prj.LibDir(ctx)
// │ │ ├── install.py
// │ │ ├── labs.yml
// │ │ ├── main.py
// │ │ └── pyproject.toml
// │ └── state <------------------------------- prj.StateDir(ctx)
// │ ├── venv <---------------------------- prj.virtualEnvPath(ctx)
// │ │ ├── bin
// │ │ │ ├── pip
// │ │ │ ├── ...
// │ │ │ ├── python -> python3.9
// │ │ │ ├── python3 -> python3.9 <---- prj.virtualEnvPython(ctx)
// │ │ │ └── python3.9 -> (path to a detected python)
// │ │ ├── include
// │ │ ├── lib
// │ │ │ └── python3.9
// │ │ │ └── site-packages
// │ │ │ ├── ...
// │ │ │ ├── distutils-precedence.pth
r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "install", "blueprint")
r.RunAndExpectOutput("setting up important infrastructure")
}
func TestInstallerWorksForDevelopment(t *testing.T) {
defer func() {
if !t.Failed() {
return
}
t.Logf("file tree:\n%s", fileTree{
Path: filepath.Dir(t.TempDir()),
})
}()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/api/2.0/clusters/list" {
respondWithJSON(t, w, compute.ListClustersResponse{
Clusters: []compute.ClusterDetails{
{
ClusterId: "abc-id",
ClusterName: "first shared",
DataSecurityMode: compute.DataSecurityModeUserIsolation,
SparkVersion: "12.2.x-whatever",
State: compute.StateRunning,
},
{
ClusterId: "bcd-id",
ClusterName: "second personal",
DataSecurityMode: compute.DataSecurityModeSingleUser,
SparkVersion: "14.5.x-whatever",
State: compute.StateRunning,
SingleUserName: "serge",
},
},
})
return
}
if r.URL.Path == "/api/2.0/preview/scim/v2/Me" {
respondWithJSON(t, w, iam.User{
UserName: "serge",
})
return
}
if r.URL.Path == "/api/2.0/clusters/spark-versions" {
respondWithJSON(t, w, compute.GetSparkVersionsResponse{
Versions: []compute.SparkVersion{
{
Key: "14.5.x-whatever",
Name: "14.5 (Awesome)",
},
},
})
return
}
if r.URL.Path == "/api/2.0/clusters/get" {
respondWithJSON(t, w, &compute.ClusterDetails{
State: compute.StateRunning,
})
return
}
if r.URL.Path == "/api/2.0/sql/warehouses" {
respondWithJSON(t, w, sql.ListWarehousesResponse{
Warehouses: []sql.EndpointInfo{
{
Id: "efg-id",
Name: "First PRO Warehouse",
WarehouseType: sql.EndpointInfoWarehouseTypePro,
},
},
})
return
}
t.Logf("Requested: %s", r.URL.Path)
t.FailNow()
}))
defer server.Close()
wd, _ := os.Getwd()
defer os.Chdir(wd)
devDir := copyTestdata(t, "testdata/installed-in-home/.databricks/labs/blueprint/lib")
err := os.Chdir(devDir)
require.NoError(t, err)
ctx := installerContext(t, server)
py, _ := python.DetectExecutable(ctx)
py, _ = filepath.Abs(py)
// development installer assumes it's in the active virtualenv
ctx = env.Set(ctx, "PYTHON_BIN", py)
err = os.WriteFile(filepath.Join(env.UserHomeDir(ctx), ".databrickscfg"), []byte(fmt.Sprintf(`
[profile-one]
host = %s
token = ...
[acc]
host = %s
account_id = abc
`, server.URL, server.URL)), ownerRW)
require.NoError(t, err)
// We have the following state at this point:
// t.TempDir()
// ├── 001 <------------------ $CWD, prj.EffectiveLibDir(ctx), prj.folder
// │ ├── install.py
// │ ├── labs.yml <--------- prj.IsDeveloperMode(ctx) == true
// │ ├── main.py
// │ └── pyproject.toml
// └── 002 <------------------ env.UserHomeDir(ctx)
// └── .databricks
// └── labs
// └── blueprint <--- project.PathInLabs(ctx, "blueprint"), prj.rootDir(ctx)
// └── cache <--- prj.CacheDir(ctx)
// └── databrickslabs-blueprint-releases.json
// `databricks labs install .` means "verify this installer i'm developing does work"
r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "install", ".")
r.WithStdin()
defer r.CloseStdin()
r.RunBackground()
r.WaitForTextPrinted("setting up important infrastructure", 5*time.Second)
}
func TestUpgraderWorksForReleases(t *testing.T) {
defer func() {
if !t.Failed() {
return
}
t.Logf("file tree:\n%s", fileTree{
Path: filepath.Dir(t.TempDir()),
})
}()
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.URL.Path == "/databrickslabs/blueprint/v0.4.0/labs.yml" {
raw, err := os.ReadFile("testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml")
if err != nil {
panic(err)
}
w.Write(raw)
return
}
if r.URL.Path == "/repos/databrickslabs/blueprint/zipball/v0.4.0" {
raw, err := zipballFromFolder("testdata/installed-in-home/.databricks/labs/blueprint/lib")
if err != nil {
panic(err)
}
w.Header().Add("Content-Type", "application/octet-stream")
w.Write(raw)
return
}
if r.URL.Path == "/api/2.0/clusters/get" {
respondWithJSON(t, w, &compute.ClusterDetails{
State: compute.StateRunning,
})
return
}
t.Logf("Requested: %s", r.URL.Path)
t.FailNow()
}))
defer server.Close()
ctx := installerContext(t, server)
newHome := copyTestdata(t, "testdata/installed-in-home")
ctx = env.WithUserHomeDir(ctx, newHome)
py, _ := python.DetectExecutable(ctx)
py, _ = filepath.Abs(py)
ctx = env.Set(ctx, "PYTHON_BIN", py)
cachePath := project.PathInLabs(ctx, "blueprint", "cache")
bs := []byte(`{"refreshed_at": "2033-01-01T00:00:00.92857+02:00","data": [{"tag_name": "v0.4.0"}]}`)
err := os.WriteFile(filepath.Join(cachePath, "databrickslabs-blueprint-releases.json"), bs, ownerRW)
require.NoError(t, err)
// simulate the case of GitHub Actions
ctx = env.Set(ctx, "DATABRICKS_HOST", server.URL)
ctx = env.Set(ctx, "DATABRICKS_TOKEN", "...")
ctx = env.Set(ctx, "DATABRICKS_CLUSTER_ID", "installer-cluster")
ctx = env.Set(ctx, "DATABRICKS_WAREHOUSE_ID", "installer-warehouse")
r := internal.NewCobraTestRunnerWithContext(t, ctx, "labs", "upgrade", "blueprint")
r.RunAndExpectOutput("setting up important infrastructure")
}

117
cmd/labs/project/login.go Normal file
View File

@ -0,0 +1,117 @@
package project
import (
"context"
"encoding/json"
"fmt"
"os"
"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/databrickscfg/cfgpickers"
"github.com/databricks/cli/libs/log"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/config"
)
type loginConfig struct {
*Entrypoint `json:"-"`
WorkspaceProfile string `json:"workspace_profile,omitempty"`
AccountProfile string `json:"account_profile,omitempty"`
ClusterID string `json:"cluster_id,omitempty"`
WarehouseID string `json:"warehouse_id,omitempty"`
}
func (lc *loginConfig) askWorkspace(ctx context.Context, cfg *config.Config) (*databricks.WorkspaceClient, error) {
if cfg.IsAccountClient() {
return nil, nil
}
err := lc.askWorkspaceProfile(ctx, cfg)
if err != nil {
return nil, fmt.Errorf("profile: %w", err)
}
w, err := databricks.NewWorkspaceClient((*databricks.Config)(cfg))
if err != nil {
return nil, fmt.Errorf("client: %w", err)
}
err = lc.askCluster(ctx, w)
if err != nil {
return nil, fmt.Errorf("cluster: %w", err)
}
err = lc.askWarehouse(ctx, w)
if err != nil {
return nil, fmt.Errorf("warehouse: %w", err)
}
return w, nil
}
func (lc *loginConfig) askWorkspaceProfile(ctx context.Context, cfg *config.Config) (err error) {
if cfg.Profile != "" {
lc.WorkspaceProfile = cfg.Profile
return
}
if !cmdio.IsInteractive(ctx) {
return ErrNotInTTY
}
lc.WorkspaceProfile, err = root.AskForWorkspaceProfile(ctx)
cfg.Profile = lc.WorkspaceProfile
return
}
func (lc *loginConfig) askCluster(ctx context.Context, w *databricks.WorkspaceClient) (err error) {
if !lc.NeedsCluster() {
return
}
if w.Config.ClusterID != "" {
lc.ClusterID = w.Config.ClusterID
return
}
if !cmdio.IsInteractive(ctx) {
return ErrNotInTTY
}
clusterID, err := cfgpickers.AskForCluster(ctx, w,
cfgpickers.WithDatabricksConnect(lc.Installer.MinRuntimeVersion))
if err != nil {
return fmt.Errorf("select: %w", err)
}
w.Config.ClusterID = clusterID
lc.ClusterID = clusterID
return
}
func (lc *loginConfig) askWarehouse(ctx context.Context, w *databricks.WorkspaceClient) (err error) {
if !lc.NeedsWarehouse() {
return
}
if w.Config.WarehouseID != "" {
lc.WarehouseID = w.Config.WarehouseID
return
}
if !cmdio.IsInteractive(ctx) {
return ErrNotInTTY
}
lc.WarehouseID, err = cfgpickers.AskForWarehouse(ctx, w,
cfgpickers.WithWarehouseTypes(lc.Installer.WarehouseTypes...))
return
}
func (lc *loginConfig) askAccountProfile(ctx context.Context, cfg *config.Config) (err error) {
if !lc.HasAccountLevelCommands() {
return nil
}
if !cmdio.IsInteractive(ctx) {
return ErrNotInTTY
}
lc.AccountProfile, err = root.AskForAccountProfile(ctx)
return
}
func (lc *loginConfig) save(ctx context.Context) error {
authFile := lc.loginFile(ctx)
raw, err := json.MarshalIndent(lc, "", " ")
if err != nil {
return err
}
log.Debugf(ctx, "Writing auth configuration to: %s", authFile)
return os.WriteFile(authFile, raw, ownerRW)
}

352
cmd/labs/project/project.go Normal file
View File

@ -0,0 +1,352 @@
package project
import (
"context"
"encoding/json"
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"time"
"github.com/databricks/cli/cmd/labs/github"
"github.com/databricks/cli/libs/env"
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/python"
"github.com/databricks/databricks-sdk-go/logger"
"github.com/fatih/color"
"gopkg.in/yaml.v3"
"github.com/spf13/cobra"
)
const ownerRW = 0o600
func Load(ctx context.Context, labsYml string) (*Project, error) {
raw, err := os.ReadFile(labsYml)
if err != nil {
return nil, fmt.Errorf("read labs.yml: %w", err)
}
project, err := readFromBytes(ctx, raw)
if err != nil {
return nil, err
}
project.folder = filepath.Dir(labsYml)
return project, nil
}
func readFromBytes(ctx context.Context, labsYmlRaw []byte) (*Project, error) {
var project Project
err := yaml.Unmarshal(labsYmlRaw, &project)
if err != nil {
return nil, fmt.Errorf("parse labs.yml: %w", err)
}
e := (&project).metaEntrypoint(ctx)
if project.Installer != nil {
project.Installer.Entrypoint = e
}
if project.Uninstaller != nil {
project.Uninstaller.Entrypoint = e
}
return &project, nil
}
type Project struct {
SpecVersion int `yaml:"$version"`
Name string `yaml:"name"`
Description string `yaml:"description"`
Installer *hook `yaml:"install,omitempty"`
Uninstaller *hook `yaml:"uninstall,omitempty"`
Main string `yaml:"entrypoint"`
MinPython string `yaml:"min_python"`
Commands []*proxy `yaml:"commands,omitempty"`
folder string
}
func (p *Project) IsZipball() bool {
// the simplest way of running the project - download ZIP file from github
return true
}
func (p *Project) HasPython() bool {
if strings.HasSuffix(p.Main, ".py") {
return true
}
if p.Installer != nil && p.Installer.HasPython() {
return true
}
if p.Uninstaller != nil && p.Uninstaller.HasPython() {
return true
}
return p.MinPython != ""
}
func (p *Project) metaEntrypoint(ctx context.Context) *Entrypoint {
return &Entrypoint{
Project: p,
RequireRunningCluster: p.requireRunningCluster(),
}
}
func (p *Project) requireRunningCluster() bool {
if p.Installer != nil && p.Installer.RequireRunningCluster() {
return true
}
for _, v := range p.Commands {
if v.RequireRunningCluster {
return true
}
}
return false
}
func (p *Project) fileExists(name string) bool {
_, err := os.Stat(name)
return err == nil
}
func (p *Project) projectFilePath(ctx context.Context, name string) string {
return filepath.Join(p.EffectiveLibDir(ctx), name)
}
func (p *Project) IsPythonProject(ctx context.Context) bool {
if p.fileExists(p.projectFilePath(ctx, "setup.py")) {
return true
}
if p.fileExists(p.projectFilePath(ctx, "pyproject.toml")) {
return true
}
return false
}
func (p *Project) IsDeveloperMode(ctx context.Context) bool {
return p.folder != "" && !strings.HasPrefix(p.LibDir(ctx), p.folder)
}
func (p *Project) HasFolder() bool {
return p.folder != ""
}
func (p *Project) HasAccountLevelCommands() bool {
for _, v := range p.Commands {
if v.IsAccountLevel {
return true
}
}
return false
}
func (p *Project) IsBundleAware() bool {
for _, v := range p.Commands {
if v.IsBundleAware {
return true
}
}
return false
}
func (p *Project) Register(parent *cobra.Command) {
group := &cobra.Command{
Use: p.Name,
Short: p.Description,
GroupID: "labs",
}
parent.AddCommand(group)
for _, cp := range p.Commands {
cp.register(group)
cp.Entrypoint.Project = p
}
}
func (p *Project) rootDir(ctx context.Context) string {
return PathInLabs(ctx, p.Name)
}
func (p *Project) CacheDir(ctx context.Context) string {
return filepath.Join(p.rootDir(ctx), "cache")
}
func (p *Project) ConfigDir(ctx context.Context) string {
return filepath.Join(p.rootDir(ctx), "config")
}
func (p *Project) LibDir(ctx context.Context) string {
return filepath.Join(p.rootDir(ctx), "lib")
}
func (p *Project) EffectiveLibDir(ctx context.Context) string {
if p.IsDeveloperMode(ctx) {
// developer is working on a local checkout, that is not inside of installed root
return p.folder
}
return p.LibDir(ctx)
}
func (p *Project) StateDir(ctx context.Context) string {
return filepath.Join(p.rootDir(ctx), "state")
}
func (p *Project) EnsureFoldersExist(ctx context.Context) error {
dirs := []string{p.CacheDir(ctx), p.ConfigDir(ctx), p.LibDir(ctx), p.StateDir(ctx)}
for _, v := range dirs {
err := os.MkdirAll(v, ownerRWXworldRX)
if err != nil {
return fmt.Errorf("folder %s: %w", v, err)
}
}
return nil
}
func (p *Project) Uninstall(cmd *cobra.Command) error {
if p.Uninstaller != nil {
err := p.Uninstaller.runHook(cmd)
if err != nil {
return fmt.Errorf("uninstall hook: %w", err)
}
}
ctx := cmd.Context()
log.Infof(ctx, "Removing project: %s", p.Name)
return os.RemoveAll(p.rootDir(ctx))
}
func (p *Project) virtualEnvPath(ctx context.Context) string {
if p.IsDeveloperMode(ctx) {
// When a virtual environment has been activated, the VIRTUAL_ENV environment variable
// is set to the path of the environment. Since explicitly activating a virtual environment
// is not required to use it, VIRTUAL_ENV cannot be relied upon to determine whether a virtual
// environment is being used.
//
// See https://docs.python.org/3/library/venv.html#how-venvs-work
activatedVenv := env.Get(ctx, "VIRTUAL_ENV")
if activatedVenv != "" {
logger.Debugf(ctx, "(development mode) using active virtual environment from: %s", activatedVenv)
return activatedVenv
}
nonActivatedVenv, err := python.DetectVirtualEnvPath(p.EffectiveLibDir(ctx))
if err == nil {
logger.Debugf(ctx, "(development mode) using virtual environment from: %s", nonActivatedVenv)
return nonActivatedVenv
}
}
// by default, we pick Virtual Environment from DATABRICKS_LABS_STATE_DIR
return filepath.Join(p.StateDir(ctx), "venv")
}
func (p *Project) virtualEnvPython(ctx context.Context) string {
overridePython := env.Get(ctx, "PYTHON_BIN")
if overridePython != "" {
return overridePython
}
if runtime.GOOS == "windows" {
return filepath.Join(p.virtualEnvPath(ctx), "Scripts", "python.exe")
}
return filepath.Join(p.virtualEnvPath(ctx), "bin", "python3")
}
func (p *Project) loginFile(ctx context.Context) string {
if p.IsDeveloperMode(ctx) {
// developers may not want to pollute the state in
// ~/.databricks/labs/X/config while the version is not yet
// released
return p.projectFilePath(ctx, ".databricks-login.json")
}
return filepath.Join(p.ConfigDir(ctx), "login.json")
}
func (p *Project) loadLoginConfig(ctx context.Context) (*loginConfig, error) {
loginFile := p.loginFile(ctx)
log.Debugf(ctx, "Loading login configuration from: %s", loginFile)
lc, err := tryLoadAndParseJSON[loginConfig](loginFile)
if err != nil {
return nil, fmt.Errorf("try load: %w", err)
}
lc.Entrypoint = p.metaEntrypoint(ctx)
return lc, nil
}
func (p *Project) versionFile(ctx context.Context) string {
return filepath.Join(p.StateDir(ctx), "version.json")
}
func (p *Project) InstalledVersion(ctx context.Context) (*version, error) {
if p.IsDeveloperMode(ctx) {
return &version{
Version: "*",
Date: time.Now(),
}, nil
}
versionFile := p.versionFile(ctx)
log.Debugf(ctx, "Loading installed version info from: %s", versionFile)
return tryLoadAndParseJSON[version](versionFile)
}
func (p *Project) writeVersionFile(ctx context.Context, ver string) error {
versionFile := p.versionFile(ctx)
raw, err := json.Marshal(version{
Version: ver,
Date: time.Now(),
})
if err != nil {
return err
}
log.Debugf(ctx, "Writing installed version info to: %s", versionFile)
return os.WriteFile(versionFile, raw, ownerRW)
}
// checkUpdates is called before every command of an installed project,
// giving users hints when they need to update their installations.
func (p *Project) checkUpdates(cmd *cobra.Command) error {
ctx := cmd.Context()
if p.IsDeveloperMode(ctx) {
// skipping update check for projects in developer mode, that
// might not be installed yet
return nil
}
r := github.NewReleaseCache("databrickslabs", p.Name, p.CacheDir(ctx))
versions, err := r.Load(ctx)
if err != nil {
return err
}
installed, err := p.InstalledVersion(ctx)
if err != nil {
return err
}
latest := versions[0]
if installed.Version == latest.Version {
return nil
}
ago := time.Since(latest.PublishedAt)
msg := "[UPGRADE ADVISED] Newer %s version was released %s ago. Please run `databricks labs upgrade %s` to upgrade: %s -> %s"
cmd.PrintErrln(color.YellowString(msg, p.Name, p.timeAgo(ago), p.Name, installed.Version, latest.Version))
return nil
}
func (p *Project) timeAgo(dur time.Duration) string {
days := int(dur.Hours()) / 24
hours := int(dur.Hours()) % 24
minutes := int(dur.Minutes()) % 60
if dur < time.Minute {
return "minute"
} else if dur < time.Hour {
return fmt.Sprintf("%d minutes", minutes)
} else if dur < (24 * time.Hour) {
return fmt.Sprintf("%d hours", hours)
}
return fmt.Sprintf("%d days", days)
}
func (p *Project) profileOverride(cmd *cobra.Command) string {
profileFlag := cmd.Flag("profile")
if profileFlag == nil {
return ""
}
return profileFlag.Value.String()
}
type version struct {
Version string `json:"version"`
Date time.Time `json:"date"`
}

View File

@ -0,0 +1,22 @@
package project
import (
"context"
"os"
"strings"
"testing"
"github.com/stretchr/testify/assert"
)
func assertEqualPaths(t *testing.T, expected, actual string) {
expected = strings.ReplaceAll(expected, "/", string(os.PathSeparator))
assert.Equal(t, expected, actual)
}
func TestLoad(t *testing.T) {
ctx := context.Background()
prj, err := Load(ctx, "testdata/installed-in-home/.databricks/labs/blueprint/lib/labs.yml")
assert.NoError(t, err)
assertEqualPaths(t, "testdata/installed-in-home/.databricks/labs/blueprint/lib", prj.folder)
}

146
cmd/labs/project/proxy.go Normal file
View File

@ -0,0 +1,146 @@
package project
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io/fs"
"path/filepath"
"strings"
"github.com/databricks/cli/libs/cmdio"
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/process"
"github.com/spf13/cobra"
"github.com/spf13/pflag"
)
type proxy struct {
Entrypoint `yaml:",inline"`
Name string `yaml:"name"`
Description string `yaml:"description"`
TableTemplate string `yaml:"table_template,omitempty"`
Flags []flag `yaml:"flags,omitempty"`
}
func (cp *proxy) register(parent *cobra.Command) {
cmd := &cobra.Command{
Use: cp.Name,
Short: cp.Description,
RunE: cp.runE,
}
parent.AddCommand(cmd)
flags := cmd.Flags()
for _, flag := range cp.Flags {
flag.register(flags)
}
}
func (cp *proxy) runE(cmd *cobra.Command, _ []string) error {
err := cp.checkUpdates(cmd)
if err != nil {
return err
}
args, err := cp.commandInput(cmd)
if err != nil {
return err
}
envs, err := cp.Prepare(cmd)
if err != nil {
return fmt.Errorf("entrypoint: %w", err)
}
ctx := cmd.Context()
log.Debugf(ctx, "Forwarding subprocess: %s", strings.Join(args, " "))
if cp.TableTemplate != "" {
return cp.renderJsonAsTable(cmd, args, envs)
}
err = process.Forwarded(ctx, args,
cmd.InOrStdin(),
cmd.OutOrStdout(),
cmd.ErrOrStderr(),
process.WithEnvs(envs))
if errors.Is(err, fs.ErrNotExist) && cp.IsPythonProject(ctx) {
msg := "cannot find Python %s. Please re-run: databricks labs install %s"
return fmt.Errorf(msg, cp.MinPython, cp.Name)
}
return err
}
// [EXPERIMENTAL] this interface contract may change in the future.
// See https://github.com/databricks/cli/issues/994
func (cp *proxy) renderJsonAsTable(cmd *cobra.Command, args []string, envs map[string]string) error {
var buf bytes.Buffer
ctx := cmd.Context()
err := process.Forwarded(ctx, args,
cmd.InOrStdin(),
&buf,
cmd.ErrOrStderr(),
process.WithEnvs(envs))
if err != nil {
return err
}
var anyVal any
err = json.Unmarshal(buf.Bytes(), &anyVal)
if err != nil {
return err
}
// IntelliJ eagerly replaces tabs with spaces, even though we're not asking for it
fixedTemplate := strings.ReplaceAll(cp.TableTemplate, "\\t", "\t")
return cmdio.RenderWithTemplate(ctx, anyVal, fixedTemplate)
}
func (cp *proxy) commandInput(cmd *cobra.Command) ([]string, error) {
flags := cmd.Flags()
commandInput := struct {
Command string `json:"command"`
Flags map[string]any `json:"flags"`
OutputType string `json:"output_type"`
}{
Command: cp.Name,
Flags: map[string]any{},
}
for _, f := range cp.Flags {
v, err := f.get(flags)
if err != nil {
return nil, fmt.Errorf("get %s flag: %w", f.Name, err)
}
commandInput.Flags[f.Name] = v
}
logLevelFlag := flags.Lookup("log-level")
if logLevelFlag != nil {
commandInput.Flags["log_level"] = logLevelFlag.Value.String()
}
args := []string{}
ctx := cmd.Context()
if cp.IsPythonProject(ctx) {
args = append(args, cp.virtualEnvPython(ctx))
libDir := cp.EffectiveLibDir(cmd.Context())
entrypoint := filepath.Join(libDir, cp.Main)
args = append(args, entrypoint)
}
raw, err := json.Marshal(commandInput)
if err != nil {
return nil, fmt.Errorf("command input: %w", err)
}
args = append(args, string(raw))
return args, nil
}
type flag struct {
Name string `yaml:"name"`
Description string `yaml:"description"`
Default any `yaml:"default,omitempty"`
}
func (f *flag) register(pf *pflag.FlagSet) {
var dflt string
if f.Default != nil {
dflt = fmt.Sprint(f.Default)
}
pf.String(f.Name, dflt, f.Description)
}
func (f *flag) get(pf *pflag.FlagSet) (any, error) {
return pf.GetString(f.Name)
}

View File

@ -0,0 +1,126 @@
{
"id": "https://raw.githubusercontent.com/databricks/cli/feat/labs/cmd/labs/project/schema.json#",
"$schema": "http://json-schema.org/draft-04/schema",
"definitions": {
"entrypoint": {
"type": "object",
"properties": {
"require_running_cluster": {
"type": "boolean",
"default": false
},
"is_unauthenticated": {
"type": "boolean",
"default": false
},
"is_account_level": {
"type": "boolean",
"default": false
},
"is_bundle_aware": {
"type": "boolean",
"default": false
}
}
},
"hook": {
"type": "object",
"$ref": "#/definitions/entrypoint",
"unevaluatedProperties": true,
"properties": {
"script": {
"type": "string",
"pattern": "^[A-Za-z0-9_-/\\.]+$"
},
"min_runtime_version": {
"type": "string",
"pattern": "^[0-9]+.[0-9]+$"
},
"require_databricks_connect": {
"type": "boolean",
"default": false
},
"warehouse_types": {
"enum": [ "PRO", "CLASSIC", "TYPE_UNSPECIFIED" ]
}
}
},
"alphanum": {
"type": "string",
"pattern": "^[a-z0-9-]$"
},
"command": {
"type": "object",
"$ref": "#/definitions/entrypoint",
"unevaluatedProperties": true,
"required": ["name", "description"],
"properties": {
"name": {
"$ref": "#/definitions/alphanum"
},
"description": {
"type": "string"
},
"table_template": {
"type": "string"
},
"flags": {
"$ref": "#/definitions/flag"
}
}
},
"flag": {
"type": "object",
"required": ["name", "description"],
"properties": {
"name": {
"$ref": "#/definitions/alphanum"
},
"description": {
"type": "string"
},
"default": {}
}
}
},
"type": "object",
"additionalProperties": false,
"required": ["name", "description", "entrypoint"],
"properties": {
"$version": {
"type": "integer",
"default": 1
},
"name": {
"$ref": "#/definitions/alphanum",
"description": "Name of the project"
},
"description": {
"type": "string",
"description": "Short description of the project"
},
"entrypoint": {
"type": "string",
"description": "Script that routes subcommands"
},
"min_python": {
"type": "string",
"pattern": "^3.[0-9]+$",
"description": "Minimal Python version required"
},
"install": {
"$ref": "#/definitions/hook",
"description": "Installation configuration"
},
"uninstall": {
"$ref": "#/definitions/hook"
},
"commands": {
"type": "array",
"description": "Exposed commands",
"items": {
"$ref": "#/definitions/command"
}
}
}
}

1
cmd/labs/project/testdata/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
!.databricks

View File

@ -0,0 +1,8 @@
{
"refreshed_at": "2033-01-01T00:00:00.92857+02:00",
"data": [
{
"tag_name": "v0.3.15"
}
]
}

View File

@ -0,0 +1,4 @@
{
"workspace_profile": "workspace-profile",
"account_profile": "account-profile"
}

View File

@ -0,0 +1 @@
print(f'setting up important infrastructure')

View File

@ -0,0 +1,33 @@
---
version: 1
name: blueprint
description: Blueprint Project
install:
min_runtime_version: 13.1
require_running_cluster: true
warehouse_types:
- PRO
script: install.py
entrypoint: main.py
min_python: 3.9
commands:
- name: echo
is_account_level: true
description: non-interactive echo
flags:
- name: first
default: something
description: first flag description
- name: foo
description: foo command
flags:
- name: first
description: first flag description
- name: second
description: second flag description
- name: table
description: something that renders a table
table_template: |
Key Value
{{range .records}}{{.key}} {{.value}}
{{end}}

View File

@ -0,0 +1,27 @@
import os, sys, json
payload = json.loads(sys.argv[1])
if 'echo' == payload['command']:
json.dump({
'command': payload['command'],
'flags': payload['flags'],
'env': {k:v for k,v in os.environ.items()}
}, sys.stdout)
sys.exit(0)
if 'table' == payload['command']:
sys.stderr.write("some intermediate info\n")
json.dump({'records': [
{'key': 'First', 'value': 'Second'},
{'key': 'Third', 'value': 'Fourth'},
]}, sys.stdout)
sys.exit(0)
print(f'host is {os.environ["DATABRICKS_HOST"]}')
print(f'[{payload["command"]}] command flags are {payload["flags"]}')
answer = input('What is your name? ')
print(f'Hello, {answer}!')

View File

@ -0,0 +1,11 @@
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "blueprint"
version = "0.3.15"
description = 'Databricks Labs Blueprint'
requires-python = ">=3.9"
classifiers = ["Programming Language :: Python"]
dependencies = []

View File

@ -0,0 +1,4 @@
{
"version": "v0.3.15",
"date": "2023-10-24T15:04:05+01:00"
}

View File

@ -0,0 +1,37 @@
{
"refreshed_at": "2033-01-01T00:00:00.92857+02:00",
"data": [
{
"name": "blueprint",
"description": "Sample project",
"language": "Python",
"default_branch": "main",
"stargazers_count": 100500,
"fork": false,
"archived": false,
"topics": [],
"html_url": "https://github.com/databrickslabs/blueprint",
"clone_url": "https://github.com/databrickslabs/blueprint.git",
"ssh_url": "git@github.com:databrickslabs/blueprint.git",
"license": {
"name": "Other"
}
},
{
"name": "ucx",
"description": "Unity Catalog Migrations",
"language": "Python",
"default_branch": "main",
"stargazers_count": 100500,
"fork": false,
"archived": false,
"topics": [],
"html_url": "https://github.com/databrickslabs/ucx",
"clone_url": "https://github.com/databrickslabs/ucx.git",
"ssh_url": "git@github.com:databrickslabs/ucx.git",
"license": {
"name": "Other"
}
}
]
}

View File

@ -0,0 +1,9 @@
[workspace-profile]
host = https://abc
token = bcd
cluster_id = cde
warehouse_id = def
[account-profile]
host = https://accounts.cloud.databricks.com
account_id = cde

57
cmd/labs/show.go Normal file
View File

@ -0,0 +1,57 @@
package labs
import (
"fmt"
"github.com/databricks/cli/cmd/labs/project"
"github.com/databricks/cli/libs/cmdio"
"github.com/spf13/cobra"
)
func newShowCommand() *cobra.Command {
return &cobra.Command{
Use: "show NAME",
Args: cobra.ExactArgs(1),
Short: "Shows information about the project",
Annotations: map[string]string{
"template": cmdio.Heredoc(`
Name: {{.name}}
Description: {{.description}}
Python: {{.is_python}}
Folders:
- lib: {{.lib_dir}}
- cache: {{.cache_dir}}
- config: {{.config_dir}}
`),
},
RunE: func(cmd *cobra.Command, args []string) error {
ctx := cmd.Context()
installed, err := project.Installed(ctx)
if err != nil {
return err
}
if len(installed) == 0 {
return fmt.Errorf("no projects found")
}
name := args[0]
for _, v := range installed {
isDev := name == "." && v.IsDeveloperMode(ctx)
isMatch := name == v.Name
if !(isDev || isMatch) {
continue
}
return cmdio.Render(ctx, map[string]any{
"name": v.Name,
"description": v.Description,
"cache_dir": v.CacheDir(ctx),
"config_dir": v.ConfigDir(ctx),
"lib_dir": v.EffectiveLibDir(ctx),
"is_python": v.IsPythonProject(ctx),
})
}
return nil
},
}
}

39
cmd/labs/uninstall.go Normal file
View File

@ -0,0 +1,39 @@
package labs
import (
"fmt"
"github.com/databricks/cli/cmd/labs/project"
"github.com/spf13/cobra"
)
func newUninstallCommand() *cobra.Command {
return &cobra.Command{
Use: "uninstall NAME",
Args: cobra.ExactArgs(1),
Short: "Uninstalls project",
ValidArgsFunction: func(cmd *cobra.Command, args []string, toComplete string) ([]string, cobra.ShellCompDirective) {
var names []string
installed, _ := project.Installed(cmd.Context())
for _, v := range installed {
names = append(names, v.Name)
}
return names, cobra.ShellCompDirectiveNoFileComp
},
RunE: func(cmd *cobra.Command, args []string) error {
ctx := cmd.Context()
installed, err := project.Installed(ctx)
if err != nil {
return err
}
name := args[0]
for _, prj := range installed {
if prj.Name != name {
continue
}
return prj.Uninstall(cmd)
}
return fmt.Errorf("not found: %s", name)
},
}
}

View File

@ -0,0 +1,64 @@
package unpack
import (
"archive/zip"
"bytes"
"fmt"
"io"
"os"
"path/filepath"
"strings"
)
const ownerRWXworldRX = 0o755
type GitHubZipball struct {
io.Reader
}
func (v GitHubZipball) UnpackTo(libTarget string) error {
raw, err := io.ReadAll(v)
if err != nil {
return err
}
zipReader, err := zip.NewReader(bytes.NewReader(raw), int64(len(raw)))
if err != nil {
return fmt.Errorf("zip: %w", err)
}
// GitHub packages entire repo contents into a top-level folder, e.g. databrickslabs-ucx-2800c6b
rootDirInZIP := zipReader.File[0].FileHeader.Name
for _, zf := range zipReader.File {
if zf.Name == rootDirInZIP {
continue
}
normalizedName := strings.TrimPrefix(zf.Name, rootDirInZIP)
targetName := filepath.Join(libTarget, normalizedName)
if zf.FileInfo().IsDir() {
err = os.MkdirAll(targetName, ownerRWXworldRX)
if err != nil {
return fmt.Errorf("mkdir %s: %w", normalizedName, err)
}
continue
}
err = v.extractFile(zf, targetName)
if err != nil {
return fmt.Errorf("extract %s: %w", zf.Name, err)
}
}
return nil
}
func (v GitHubZipball) extractFile(zf *zip.File, targetName string) error {
reader, err := zf.Open()
if err != nil {
return fmt.Errorf("source: %w", err)
}
defer reader.Close()
writer, err := os.OpenFile(targetName, os.O_CREATE|os.O_RDWR, zf.Mode())
if err != nil {
return fmt.Errorf("target: %w", err)
}
defer writer.Close()
_, err = io.Copy(writer, reader)
return err
}

21
cmd/labs/upgrade.go Normal file
View File

@ -0,0 +1,21 @@
package labs
import (
"github.com/databricks/cli/cmd/labs/project"
"github.com/spf13/cobra"
)
func newUpgradeCommand() *cobra.Command {
return &cobra.Command{
Use: "upgrade NAME",
Args: cobra.ExactArgs(1),
Short: "Upgrades project",
RunE: func(cmd *cobra.Command, args []string) error {
inst, err := project.NewUpgrader(cmd, args[0])
if err != nil {
return err
}
return inst.Upgrade(cmd.Context())
},
}
}