(PoC) `Databricks Labs` command group

Allow for `labs.yml` definition in target repositories:

```
---
name: dbx
context: workspace
description: Databricks CLI extensions
hooks:
  install: install.py
entrypoint: main.py
commands:
  - name: foo
    description: foo command
    flags:
      - name: first
        description: first flag description
      - name: second
        description: second flag description
  - name: bar
    description: bar command
    flags:
      - name: first
        description: first flag description
      - name: second
        description: second flag description
```

and simple command entry points that are aware of both CLI flags and Unified Authentication env variables:

```
import os, sys, json

print(f'host is {os.environ["DATABRICKS_HOST"]}')

payload = json.loads(sys.argv[1])
print(f'[{payload["command"]}]: flags are {payload["flags"]}')

answer = input('What is your name? ')

print(f'got answer: {answer}')

answer = input('Preferences? ')

print(f'got answer: {answer}')
```
This commit is contained in:
Serge Smertin 2023-08-05 18:06:50 +02:00
parent ce9c9148c9
commit a686542b1f
No known key found for this signature in database
GPG Key ID: 92A95A66446BCE3F
9 changed files with 554 additions and 0 deletions

View File

@ -7,6 +7,7 @@ import (
"github.com/databricks/cli/cmd/bundle"
"github.com/databricks/cli/cmd/configure"
"github.com/databricks/cli/cmd/fs"
"github.com/databricks/cli/cmd/labs"
"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/cmd/sync"
"github.com/databricks/cli/cmd/version"
@ -37,6 +38,7 @@ func New() *cobra.Command {
cli.AddCommand(bundle.New())
cli.AddCommand(configure.New())
cli.AddCommand(fs.New())
cli.AddCommand(labs.New())
cli.AddCommand(sync.New())
cli.AddCommand(version.New())

21
cmd/internal/test.go Normal file
View File

@ -0,0 +1,21 @@
package internal
import (
"bytes"
"context"
"github.com/databricks/cli/cmd"
)
func RunGetOutput(ctx context.Context, args ...string) ([]byte, error) {
root := cmd.New()
args = append(args, "--log-level", "debug")
root.SetArgs(args)
var buf bytes.Buffer
root.SetOut(&buf)
err := root.ExecuteContext(ctx)
if err != nil {
return nil, err
}
return buf.Bytes(), nil
}

30
cmd/labs/feature/all.go Normal file
View File

@ -0,0 +1,30 @@
package feature
import (
"context"
"fmt"
"os"
"path/filepath"
)
func LoadAll(ctx context.Context) (features []*Feature, err error) {
home, err := os.UserHomeDir()
if err != nil {
return nil, err
}
labsDir, err := os.ReadDir(filepath.Join(home, ".databricks", "labs"))
if err != nil {
return nil, err
}
for _, v := range labsDir {
if !v.IsDir() {
continue
}
feature, err := NewFeature(v.Name())
if err != nil {
return nil, fmt.Errorf("%s: %w", v.Name(), err)
}
features = append(features, feature)
}
return features, nil
}

218
cmd/labs/feature/feature.go Normal file
View File

@ -0,0 +1,218 @@
package feature
import (
"bytes"
"context"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"sort"
"strings"
"time"
"github.com/databricks/cli/libs/git"
"github.com/databricks/cli/libs/log"
"golang.org/x/mod/semver"
"gopkg.in/yaml.v2"
)
type Feature struct {
Name string `json:"name"`
Context string `json:"context,omitempty"` // auth context
Description string `json:"description"`
Hooks struct {
Install string `json:"install,omitempty"`
Uninstall string `json:"uninstall,omitempty"`
}
Entrypoint string `json:"entrypoint"`
Commands []struct {
Name string `json:"name"`
Description string `json:"description"`
Flags []struct {
Name string `json:"name"`
Description string `json:"description"`
} `json:"flags,omitempty"`
} `json:"commands,omitempty"`
path string
checkout *git.Repository
}
func NewFeature(name string) (*Feature, error) {
home, err := os.UserHomeDir()
if err != nil {
return nil, err
}
path := filepath.Join(home, ".databricks", "labs", name)
checkout, err := git.NewRepository(path)
if err != nil && !os.IsNotExist(err) {
return nil, err
}
feat := &Feature{
Name: name,
path: path,
checkout: checkout,
}
raw, err := os.ReadFile(filepath.Join(path, "labs.yml"))
if err != nil {
return nil, fmt.Errorf("read labs.yml: %w", err)
}
err = yaml.Unmarshal(raw, feat)
if err != nil {
return nil, fmt.Errorf("parse labs.yml: %w", err)
}
return feat, nil
}
type release struct {
TagName string `json:"tag_name"`
Draft bool `json:"draft"`
Prerelease bool `json:"prerelease"`
PublishedAt time.Time `json:"published_at"`
}
func (i *Feature) LatestVersion(ctx context.Context) (*release, error) {
var tags []release
url := fmt.Sprintf("https://api.github.com/repos/databrickslabs/%s/releases", i.Name)
err := httpCall(ctx, url, &tags)
if err != nil {
return nil, err
}
return &tags[0], nil
}
const CacheDir = ".databricks"
type pythonInstallation struct {
Version string
Binary string
}
func (i *Feature) pythonExecutables(ctx context.Context) ([]pythonInstallation, error) {
found := []pythonInstallation{}
paths := strings.Split(os.Getenv("PATH"), string(os.PathListSeparator))
for _, candidate := range paths {
bin := filepath.Join(candidate, "python3")
_, err := os.Stat(bin)
if err != nil && os.IsNotExist(err) {
continue
}
out, err := i.cmd(ctx, bin, "--version")
if err != nil {
return nil, err
}
words := strings.Split(out, " ")
found = append(found, pythonInstallation{
Version: words[len(words)-1],
Binary: bin,
})
}
if len(found) == 0 {
return nil, fmt.Errorf("no python3 executables found")
}
sort.Slice(found, func(i, j int) bool {
a := found[i].Version
b := found[j].Version
cmp := semver.Compare(a, b)
if cmp != 0 {
return cmp < 0
}
return a < b
})
return found, nil
}
func (i *Feature) installVirtualEnv(ctx context.Context) error {
_, err := os.Stat(filepath.Join(i.path, "setup.py"))
if err != nil {
return err
}
pys, err := i.pythonExecutables(ctx)
if err != nil {
return err
}
python3 := pys[0].Binary
log.Debugf(ctx, "Creating python virtual environment in %s/%s", i.path, CacheDir)
_, err = i.cmd(ctx, python3, "-m", "venv", CacheDir)
if err != nil {
return fmt.Errorf("create venv: %w", err)
}
log.Debugf(ctx, "Installing dependencies from setup.py")
venvPip := filepath.Join(i.path, CacheDir, "bin", "pip")
_, err = i.cmd(ctx, venvPip, "install", ".")
if err != nil {
return fmt.Errorf("pip install: %w", err)
}
return nil
}
func (i *Feature) Run(ctx context.Context, raw []byte) error {
err := i.installVirtualEnv(ctx)
if err != nil {
return err
}
// TODO: detect virtual env (also create it on installation),
// because here we just assume that virtual env is installed.
python3 := filepath.Join(i.path, CacheDir, "bin", "python")
// make sure to sync on writing to stdout
reader, writer := io.Pipe()
go io.CopyBuffer(os.Stdout, reader, make([]byte, 128))
defer reader.Close()
defer writer.Close()
// pass command parameters down to script as the first arg
cmd := exec.Command(python3, i.Entrypoint, string(raw))
cmd.Dir = i.path
cmd.Stdout = writer
cmd.Stderr = writer
stdin, err := cmd.StdinPipe()
if err != nil {
return err
}
go io.CopyBuffer(stdin, os.Stdin, make([]byte, 128))
defer stdin.Close()
err = cmd.Start()
if err != nil {
return err
}
return cmd.Wait()
}
func (i *Feature) cmd(ctx context.Context, args ...string) (string, error) {
commandStr := strings.Join(args, " ")
log.Debugf(ctx, "running: %s", commandStr)
cmd := exec.Command(args[0], args[1:]...)
stdout := &bytes.Buffer{}
cmd.Dir = i.path
cmd.Stdin = os.Stdin
cmd.Stdout = stdout
cmd.Stderr = stdout
if err := cmd.Run(); err != nil {
return "", fmt.Errorf("%s: %s", commandStr, stdout.String())
}
return strings.TrimSpace(stdout.String()), nil
}
func (i *Feature) Install(ctx context.Context) error {
if i.checkout != nil {
curr, err := i.cmd(ctx, "git", "tag", "--points-at", "HEAD")
if err != nil {
return err
}
return fmt.Errorf("%s (%s) is already installed", i.Name, curr)
}
url := fmt.Sprintf("https://github.com/databrickslabs/%s", i.Name)
release, err := i.LatestVersion(ctx)
if err != nil {
return err
}
log.Infof(ctx, "Installing %s (%s) into %s", url, release.TagName, i.path)
return git.Clone(ctx, url, release.TagName, i.path)
}

View File

@ -0,0 +1,29 @@
package feature
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
)
func httpCall(ctx context.Context, url string, response any) error {
res, err := http.Get(url)
if err != nil {
return err
}
if res.StatusCode >= 400 {
return fmt.Errorf("github request failed: %s", res.Status)
}
defer res.Body.Close()
raw, err := io.ReadAll(res.Body)
if err != nil {
return err
}
err = json.Unmarshal(raw, response)
if err != nil {
return err
}
return nil
}

24
cmd/labs/install.go Normal file
View File

@ -0,0 +1,24 @@
package labs
import (
"github.com/databricks/cli/cmd/labs/feature"
"github.com/databricks/cli/cmd/root"
"github.com/spf13/cobra"
)
func newInstallCommand() *cobra.Command {
return &cobra.Command{
Use: "install NAME",
Short: "Install a feature",
Args: cobra.ExactArgs(1),
PreRunE: root.MustWorkspaceClient,
RunE: func(cmd *cobra.Command, args []string) error {
ctx := cmd.Context()
state, err := feature.NewFeature(args[0])
if err != nil {
return err
}
return state.Install(ctx)
},
}
}

15
cmd/labs/install_test.go Normal file
View File

@ -0,0 +1,15 @@
package labs_test
import (
"context"
"testing"
"github.com/databricks/cli/cmd/internal"
"github.com/stretchr/testify/assert"
)
func TestInstallDbx(t *testing.T) {
ctx := context.Background()
_, err := internal.RunGetOutput(ctx, "labs", "install", "dbx")
assert.NoError(t, err)
}

126
cmd/labs/labs.go Normal file
View File

@ -0,0 +1,126 @@
package labs
import (
"encoding/json"
"fmt"
"os"
"github.com/databricks/cli/cmd/labs/feature"
"github.com/databricks/cli/cmd/root"
"github.com/databricks/databricks-sdk-go/config"
"github.com/spf13/cobra"
)
func New() *cobra.Command {
cmd := &cobra.Command{
Use: "labs",
Short: "Databricks Labs features",
Long: `Manage experimental Databricks Labs apps`,
}
// TODO: this should be on the top CLI level
cmd.AddGroup(&cobra.Group{
ID: "labs",
Title: "Databricks Labs",
})
cmd.AddCommand(
newListCommand(),
newInstallCommand(),
&cobra.Command{
Use: "py",
Short: "...",
RunE: func(cmd *cobra.Command, args []string) error {
return nil
},
},
)
err := infuse(cmd)
if err != nil {
panic(err)
}
return cmd
}
type commandInput struct {
Command string `json:"command"`
Flags map[string]any `json:"flags"`
OutputType string `json:"output_type"`
}
func infuse(cmd *cobra.Command) error {
ctx := cmd.Context()
all, err := feature.LoadAll(ctx)
if err != nil {
return err
}
for _, f := range all {
group := &cobra.Command{
Use: f.Name,
Short: f.Description,
GroupID: "labs",
}
cmd.AddCommand(group)
for _, v := range f.Commands {
l := v
definedFlags := v.Flags
vcmd := &cobra.Command{
Use: v.Name,
Short: v.Description,
RunE: func(cmd *cobra.Command, args []string) error {
flags := cmd.Flags()
if f.Context == "workspace" {
// TODO: context can be on both command and feature level
err = root.MustWorkspaceClient(cmd, args)
if err != nil {
return err
}
// TODO: add account-level init as well
w := root.WorkspaceClient(cmd.Context())
for _, a := range config.ConfigAttributes {
if a.IsZero(w.Config) {
continue
}
for _, ev := range a.EnvVars {
err = os.Setenv(ev, a.GetString(w.Config))
if err != nil {
return fmt.Errorf("set %s: %w", a.Name, err)
}
}
}
}
ci := &commandInput{
Command: l.Name,
Flags: map[string]any{},
}
for _, flag := range definedFlags {
v, err := flags.GetString(flag.Name)
if err != nil {
return fmt.Errorf("get %s flag: %w", flag.Name, err)
}
ci.Flags[flag.Name] = v
}
logLevelFlag := flags.Lookup("log-level")
if logLevelFlag != nil {
ci.Flags["log_level"] = logLevelFlag.Value.String()
}
raw, err := json.Marshal(ci)
if err != nil {
return err
}
ctx := cmd.Context()
// actually execute the command
return f.Run(ctx, raw)
},
}
flags := vcmd.Flags()
for _, flag := range definedFlags {
flags.String(flag.Name, "", flag.Description)
}
group.AddCommand(vcmd)
}
}
return nil
}

89
cmd/labs/list.go Normal file
View File

@ -0,0 +1,89 @@
package labs
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"github.com/databricks/cli/libs/cmdio"
"github.com/spf13/cobra"
)
type labsMeta struct {
Name string `json:"name"`
Description string `json:"description"`
License string `json:"license"`
}
func httpCall(ctx context.Context, url string, response any) error {
res, err := http.Get(url)
if err != nil {
return err
}
if res.StatusCode >= 400 {
return fmt.Errorf("github request failed: %s", res.Status)
}
defer res.Body.Close()
raw, err := io.ReadAll(res.Body)
if err != nil {
return err
}
err = json.Unmarshal(raw, response)
if err != nil {
return err
}
return nil
}
func newListCommand() *cobra.Command {
return &cobra.Command{
Use: "list",
Short: "List all labs",
Annotations: map[string]string{
"template": cmdio.Heredoc(`
Name Description
{{range .}}{{.Name}} {{.Description}}
{{end}}
`),
},
RunE: func(cmd *cobra.Command, args []string) error {
ctx := cmd.Context()
var repositories []struct {
Name string `json:"name"`
Description string `json:"description"`
Fork bool `json:"fork"`
Arcived bool `json:"archived"`
License struct {
Name string `json:"name"`
} `json:"license"`
}
err := httpCall(ctx,
"https://api.github.com/users/databrickslabs/repos",
&repositories)
if err != nil {
return err
}
info := []labsMeta{}
for _, v := range repositories {
if v.Arcived {
continue
}
if v.Fork {
continue
}
description := v.Description
if len(description) > 50 {
description = description[:50] + "..."
}
info = append(info, labsMeta{
Name: v.Name,
Description: description,
License: v.License.Name,
})
}
return cmdio.Render(ctx, info)
},
}
}