From a686542b1f481222615c389c46ecfb7f14f9c332 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Sat, 5 Aug 2023 18:06:50 +0200 Subject: [PATCH] (PoC) `Databricks Labs` command group Allow for `labs.yml` definition in target repositories: ``` --- name: dbx context: workspace description: Databricks CLI extensions hooks: install: install.py entrypoint: main.py commands: - name: foo description: foo command flags: - name: first description: first flag description - name: second description: second flag description - name: bar description: bar command flags: - name: first description: first flag description - name: second description: second flag description ``` and simple command entry points that are aware of both CLI flags and Unified Authentication env variables: ``` import os, sys, json print(f'host is {os.environ["DATABRICKS_HOST"]}') payload = json.loads(sys.argv[1]) print(f'[{payload["command"]}]: flags are {payload["flags"]}') answer = input('What is your name? ') print(f'got answer: {answer}') answer = input('Preferences? ') print(f'got answer: {answer}') ``` --- cmd/cmd.go | 2 + cmd/internal/test.go | 21 ++++ cmd/labs/feature/all.go | 30 +++++ cmd/labs/feature/feature.go | 218 ++++++++++++++++++++++++++++++++++ cmd/labs/feature/http_call.go | 29 +++++ cmd/labs/install.go | 24 ++++ cmd/labs/install_test.go | 15 +++ cmd/labs/labs.go | 126 ++++++++++++++++++++ cmd/labs/list.go | 89 ++++++++++++++ 9 files changed, 554 insertions(+) create mode 100644 cmd/internal/test.go create mode 100644 cmd/labs/feature/all.go create mode 100644 cmd/labs/feature/feature.go create mode 100644 cmd/labs/feature/http_call.go create mode 100644 cmd/labs/install.go create mode 100644 cmd/labs/install_test.go create mode 100644 cmd/labs/labs.go create mode 100644 cmd/labs/list.go diff --git a/cmd/cmd.go b/cmd/cmd.go index 04d7cc804..18d3cac16 100644 --- a/cmd/cmd.go +++ b/cmd/cmd.go @@ -7,6 +7,7 @@ import ( "github.com/databricks/cli/cmd/bundle" "github.com/databricks/cli/cmd/configure" "github.com/databricks/cli/cmd/fs" + "github.com/databricks/cli/cmd/labs" "github.com/databricks/cli/cmd/root" "github.com/databricks/cli/cmd/sync" "github.com/databricks/cli/cmd/version" @@ -37,6 +38,7 @@ func New() *cobra.Command { cli.AddCommand(bundle.New()) cli.AddCommand(configure.New()) cli.AddCommand(fs.New()) + cli.AddCommand(labs.New()) cli.AddCommand(sync.New()) cli.AddCommand(version.New()) diff --git a/cmd/internal/test.go b/cmd/internal/test.go new file mode 100644 index 000000000..19fe8b30d --- /dev/null +++ b/cmd/internal/test.go @@ -0,0 +1,21 @@ +package internal + +import ( + "bytes" + "context" + + "github.com/databricks/cli/cmd" +) + +func RunGetOutput(ctx context.Context, args ...string) ([]byte, error) { + root := cmd.New() + args = append(args, "--log-level", "debug") + root.SetArgs(args) + var buf bytes.Buffer + root.SetOut(&buf) + err := root.ExecuteContext(ctx) + if err != nil { + return nil, err + } + return buf.Bytes(), nil +} diff --git a/cmd/labs/feature/all.go b/cmd/labs/feature/all.go new file mode 100644 index 000000000..c1cf75743 --- /dev/null +++ b/cmd/labs/feature/all.go @@ -0,0 +1,30 @@ +package feature + +import ( + "context" + "fmt" + "os" + "path/filepath" +) + +func LoadAll(ctx context.Context) (features []*Feature, err error) { + home, err := os.UserHomeDir() + if err != nil { + return nil, err + } + labsDir, err := os.ReadDir(filepath.Join(home, ".databricks", "labs")) + if err != nil { + return nil, err + } + for _, v := range labsDir { + if !v.IsDir() { + continue + } + feature, err := NewFeature(v.Name()) + if err != nil { + return nil, fmt.Errorf("%s: %w", v.Name(), err) + } + features = append(features, feature) + } + return features, nil +} diff --git a/cmd/labs/feature/feature.go b/cmd/labs/feature/feature.go new file mode 100644 index 000000000..3b78e4eca --- /dev/null +++ b/cmd/labs/feature/feature.go @@ -0,0 +1,218 @@ +package feature + +import ( + "bytes" + "context" + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/databricks/cli/libs/git" + "github.com/databricks/cli/libs/log" + "golang.org/x/mod/semver" + "gopkg.in/yaml.v2" +) + +type Feature struct { + Name string `json:"name"` + Context string `json:"context,omitempty"` // auth context + Description string `json:"description"` + Hooks struct { + Install string `json:"install,omitempty"` + Uninstall string `json:"uninstall,omitempty"` + } + Entrypoint string `json:"entrypoint"` + Commands []struct { + Name string `json:"name"` + Description string `json:"description"` + Flags []struct { + Name string `json:"name"` + Description string `json:"description"` + } `json:"flags,omitempty"` + } `json:"commands,omitempty"` + + path string + checkout *git.Repository +} + +func NewFeature(name string) (*Feature, error) { + home, err := os.UserHomeDir() + if err != nil { + return nil, err + } + path := filepath.Join(home, ".databricks", "labs", name) + checkout, err := git.NewRepository(path) + if err != nil && !os.IsNotExist(err) { + return nil, err + } + feat := &Feature{ + Name: name, + path: path, + checkout: checkout, + } + raw, err := os.ReadFile(filepath.Join(path, "labs.yml")) + if err != nil { + return nil, fmt.Errorf("read labs.yml: %w", err) + } + err = yaml.Unmarshal(raw, feat) + if err != nil { + return nil, fmt.Errorf("parse labs.yml: %w", err) + } + return feat, nil +} + +type release struct { + TagName string `json:"tag_name"` + Draft bool `json:"draft"` + Prerelease bool `json:"prerelease"` + PublishedAt time.Time `json:"published_at"` +} + +func (i *Feature) LatestVersion(ctx context.Context) (*release, error) { + var tags []release + url := fmt.Sprintf("https://api.github.com/repos/databrickslabs/%s/releases", i.Name) + err := httpCall(ctx, url, &tags) + if err != nil { + return nil, err + } + return &tags[0], nil +} + +const CacheDir = ".databricks" + +type pythonInstallation struct { + Version string + Binary string +} + +func (i *Feature) pythonExecutables(ctx context.Context) ([]pythonInstallation, error) { + found := []pythonInstallation{} + paths := strings.Split(os.Getenv("PATH"), string(os.PathListSeparator)) + for _, candidate := range paths { + bin := filepath.Join(candidate, "python3") + _, err := os.Stat(bin) + if err != nil && os.IsNotExist(err) { + continue + } + out, err := i.cmd(ctx, bin, "--version") + if err != nil { + return nil, err + } + words := strings.Split(out, " ") + found = append(found, pythonInstallation{ + Version: words[len(words)-1], + Binary: bin, + }) + } + if len(found) == 0 { + return nil, fmt.Errorf("no python3 executables found") + } + sort.Slice(found, func(i, j int) bool { + a := found[i].Version + b := found[j].Version + cmp := semver.Compare(a, b) + if cmp != 0 { + return cmp < 0 + } + return a < b + }) + return found, nil +} + +func (i *Feature) installVirtualEnv(ctx context.Context) error { + _, err := os.Stat(filepath.Join(i.path, "setup.py")) + if err != nil { + return err + } + pys, err := i.pythonExecutables(ctx) + if err != nil { + return err + } + python3 := pys[0].Binary + log.Debugf(ctx, "Creating python virtual environment in %s/%s", i.path, CacheDir) + _, err = i.cmd(ctx, python3, "-m", "venv", CacheDir) + if err != nil { + return fmt.Errorf("create venv: %w", err) + } + + log.Debugf(ctx, "Installing dependencies from setup.py") + venvPip := filepath.Join(i.path, CacheDir, "bin", "pip") + _, err = i.cmd(ctx, venvPip, "install", ".") + if err != nil { + return fmt.Errorf("pip install: %w", err) + } + return nil +} + +func (i *Feature) Run(ctx context.Context, raw []byte) error { + err := i.installVirtualEnv(ctx) + if err != nil { + return err + } + // TODO: detect virtual env (also create it on installation), + // because here we just assume that virtual env is installed. + python3 := filepath.Join(i.path, CacheDir, "bin", "python") + + // make sure to sync on writing to stdout + reader, writer := io.Pipe() + go io.CopyBuffer(os.Stdout, reader, make([]byte, 128)) + defer reader.Close() + defer writer.Close() + + // pass command parameters down to script as the first arg + cmd := exec.Command(python3, i.Entrypoint, string(raw)) + cmd.Dir = i.path + cmd.Stdout = writer + cmd.Stderr = writer + + stdin, err := cmd.StdinPipe() + if err != nil { + return err + } + go io.CopyBuffer(stdin, os.Stdin, make([]byte, 128)) + defer stdin.Close() + + err = cmd.Start() + if err != nil { + return err + } + + return cmd.Wait() +} + +func (i *Feature) cmd(ctx context.Context, args ...string) (string, error) { + commandStr := strings.Join(args, " ") + log.Debugf(ctx, "running: %s", commandStr) + cmd := exec.Command(args[0], args[1:]...) + stdout := &bytes.Buffer{} + cmd.Dir = i.path + cmd.Stdin = os.Stdin + cmd.Stdout = stdout + cmd.Stderr = stdout + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("%s: %s", commandStr, stdout.String()) + } + return strings.TrimSpace(stdout.String()), nil +} + +func (i *Feature) Install(ctx context.Context) error { + if i.checkout != nil { + curr, err := i.cmd(ctx, "git", "tag", "--points-at", "HEAD") + if err != nil { + return err + } + return fmt.Errorf("%s (%s) is already installed", i.Name, curr) + } + url := fmt.Sprintf("https://github.com/databrickslabs/%s", i.Name) + release, err := i.LatestVersion(ctx) + if err != nil { + return err + } + log.Infof(ctx, "Installing %s (%s) into %s", url, release.TagName, i.path) + return git.Clone(ctx, url, release.TagName, i.path) +} diff --git a/cmd/labs/feature/http_call.go b/cmd/labs/feature/http_call.go new file mode 100644 index 000000000..d64a72f37 --- /dev/null +++ b/cmd/labs/feature/http_call.go @@ -0,0 +1,29 @@ +package feature + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" +) + +func httpCall(ctx context.Context, url string, response any) error { + res, err := http.Get(url) + if err != nil { + return err + } + if res.StatusCode >= 400 { + return fmt.Errorf("github request failed: %s", res.Status) + } + defer res.Body.Close() + raw, err := io.ReadAll(res.Body) + if err != nil { + return err + } + err = json.Unmarshal(raw, response) + if err != nil { + return err + } + return nil +} diff --git a/cmd/labs/install.go b/cmd/labs/install.go new file mode 100644 index 000000000..1ac366def --- /dev/null +++ b/cmd/labs/install.go @@ -0,0 +1,24 @@ +package labs + +import ( + "github.com/databricks/cli/cmd/labs/feature" + "github.com/databricks/cli/cmd/root" + "github.com/spf13/cobra" +) + +func newInstallCommand() *cobra.Command { + return &cobra.Command{ + Use: "install NAME", + Short: "Install a feature", + Args: cobra.ExactArgs(1), + PreRunE: root.MustWorkspaceClient, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + state, err := feature.NewFeature(args[0]) + if err != nil { + return err + } + return state.Install(ctx) + }, + } +} diff --git a/cmd/labs/install_test.go b/cmd/labs/install_test.go new file mode 100644 index 000000000..7b951186b --- /dev/null +++ b/cmd/labs/install_test.go @@ -0,0 +1,15 @@ +package labs_test + +import ( + "context" + "testing" + + "github.com/databricks/cli/cmd/internal" + "github.com/stretchr/testify/assert" +) + +func TestInstallDbx(t *testing.T) { + ctx := context.Background() + _, err := internal.RunGetOutput(ctx, "labs", "install", "dbx") + assert.NoError(t, err) +} diff --git a/cmd/labs/labs.go b/cmd/labs/labs.go new file mode 100644 index 000000000..be7a33df1 --- /dev/null +++ b/cmd/labs/labs.go @@ -0,0 +1,126 @@ +package labs + +import ( + "encoding/json" + "fmt" + "os" + + "github.com/databricks/cli/cmd/labs/feature" + "github.com/databricks/cli/cmd/root" + "github.com/databricks/databricks-sdk-go/config" + "github.com/spf13/cobra" +) + +func New() *cobra.Command { + cmd := &cobra.Command{ + Use: "labs", + Short: "Databricks Labs features", + Long: `Manage experimental Databricks Labs apps`, + } + + // TODO: this should be on the top CLI level + cmd.AddGroup(&cobra.Group{ + ID: "labs", + Title: "Databricks Labs", + }) + + cmd.AddCommand( + newListCommand(), + newInstallCommand(), + &cobra.Command{ + Use: "py", + Short: "...", + RunE: func(cmd *cobra.Command, args []string) error { + return nil + }, + }, + ) + + err := infuse(cmd) + if err != nil { + panic(err) + } + + return cmd +} + +type commandInput struct { + Command string `json:"command"` + Flags map[string]any `json:"flags"` + OutputType string `json:"output_type"` +} + +func infuse(cmd *cobra.Command) error { + ctx := cmd.Context() + all, err := feature.LoadAll(ctx) + if err != nil { + return err + } + for _, f := range all { + group := &cobra.Command{ + Use: f.Name, + Short: f.Description, + GroupID: "labs", + } + cmd.AddCommand(group) + for _, v := range f.Commands { + l := v + definedFlags := v.Flags + vcmd := &cobra.Command{ + Use: v.Name, + Short: v.Description, + RunE: func(cmd *cobra.Command, args []string) error { + flags := cmd.Flags() + if f.Context == "workspace" { + // TODO: context can be on both command and feature level + err = root.MustWorkspaceClient(cmd, args) + if err != nil { + return err + } + // TODO: add account-level init as well + w := root.WorkspaceClient(cmd.Context()) + for _, a := range config.ConfigAttributes { + if a.IsZero(w.Config) { + continue + } + for _, ev := range a.EnvVars { + err = os.Setenv(ev, a.GetString(w.Config)) + if err != nil { + return fmt.Errorf("set %s: %w", a.Name, err) + } + } + } + } + ci := &commandInput{ + Command: l.Name, + Flags: map[string]any{}, + } + for _, flag := range definedFlags { + v, err := flags.GetString(flag.Name) + if err != nil { + return fmt.Errorf("get %s flag: %w", flag.Name, err) + } + ci.Flags[flag.Name] = v + } + logLevelFlag := flags.Lookup("log-level") + if logLevelFlag != nil { + ci.Flags["log_level"] = logLevelFlag.Value.String() + } + raw, err := json.Marshal(ci) + if err != nil { + return err + } + ctx := cmd.Context() + // actually execute the command + return f.Run(ctx, raw) + }, + } + flags := vcmd.Flags() + for _, flag := range definedFlags { + flags.String(flag.Name, "", flag.Description) + } + group.AddCommand(vcmd) + } + } + return nil +} diff --git a/cmd/labs/list.go b/cmd/labs/list.go new file mode 100644 index 000000000..c8ea1455d --- /dev/null +++ b/cmd/labs/list.go @@ -0,0 +1,89 @@ +package labs + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + + "github.com/databricks/cli/libs/cmdio" + "github.com/spf13/cobra" +) + +type labsMeta struct { + Name string `json:"name"` + Description string `json:"description"` + License string `json:"license"` +} + +func httpCall(ctx context.Context, url string, response any) error { + res, err := http.Get(url) + if err != nil { + return err + } + if res.StatusCode >= 400 { + return fmt.Errorf("github request failed: %s", res.Status) + } + defer res.Body.Close() + raw, err := io.ReadAll(res.Body) + if err != nil { + return err + } + err = json.Unmarshal(raw, response) + if err != nil { + return err + } + return nil +} + +func newListCommand() *cobra.Command { + return &cobra.Command{ + Use: "list", + Short: "List all labs", + Annotations: map[string]string{ + "template": cmdio.Heredoc(` + Name Description + {{range .}}{{.Name}} {{.Description}} + {{end}} + `), + }, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + var repositories []struct { + Name string `json:"name"` + Description string `json:"description"` + Fork bool `json:"fork"` + Arcived bool `json:"archived"` + License struct { + Name string `json:"name"` + } `json:"license"` + } + err := httpCall(ctx, + "https://api.github.com/users/databrickslabs/repos", + &repositories) + if err != nil { + return err + } + info := []labsMeta{} + for _, v := range repositories { + if v.Arcived { + continue + } + if v.Fork { + continue + } + description := v.Description + if len(description) > 50 { + description = description[:50] + "..." + } + info = append(info, labsMeta{ + Name: v.Name, + Description: description, + License: v.License.Name, + }) + } + return cmdio.Render(ctx, info) + }, + } +}