Added basic project configuration

This commit is contained in:
Serge Smertin 2022-05-14 19:55:00 +02:00
parent 4e8955085e
commit 1029244390
7 changed files with 289 additions and 48 deletions

6
project/README.md Normal file
View File

@ -0,0 +1,6 @@
Project Configuration
---
_Good implicit defaults is better than explicit complex configuration._
Regardless of current working directory, `bricks` finds project root with `databricks.yml` file up the directory tree. Technically, there might be couple of different Databricks Projects in the same Git repository, but the recommended scenario is to have just one `databricks.yml` in the root of Git repo.

159
project/config.go Normal file
View File

@ -0,0 +1,159 @@
package project
import (
"errors"
"fmt"
"io/ioutil"
"net/url"
"os"
"path"
"reflect"
"github.com/databrickslabs/terraform-provider-databricks/clusters"
"github.com/ghodss/yaml"
gitUrls "github.com/whilp/git-urls"
"gopkg.in/ini.v1"
)
type Isolation string
const (
None Isolation = ""
Soft Isolation = "soft"
)
// ConfigFile is the name of project configuration file
const ConfigFile = "databricks.yml"
type Assertions struct {
Groups []string `json:"groups,omitempty"`
Secrets []string `json:"secrets,omitempty"`
ServicePrincipals []string `json:"service_principals,omitempty"`
}
type Project struct {
Name string `json:"name"` // or do default from folder name?..
Profile string `json:"profile,omitempty"`
Isolation Isolation `json:"isolation,omitempty"`
// TODO: turn to pointer for the easy YAML marshalling
DevCluster *clusters.Cluster `json:"dev_cluster,omitempty"`
// Assertions defines a list of configurations expected to be applied
// to the workspace by a higher-privileged user (or service principal)
// in order for the deploy command to work, as individual project teams
// in almost all the cases dont have admin privileges on Databricks
// workspaces.
//
// This configuration simplifies the flexibility of individual project
// teams, make jobs deployment easier and portable across environments.
// This configuration block would contain the following entities to be
// created by administrator users or admin-level automation, like Terraform
// and/or SCIM provisioning.
Assertions *Assertions `json:"assertions,omitempty"`
}
func (p Project) IsDevClusterDefined() bool {
return reflect.ValueOf(p.DevCluster).IsZero()
}
// IsDevClusterJustReference denotes reference-only clusters.
// This conflicts with Soft isolation. Happens for cost-restricted projects,
// where there's only a single Shared Autoscaling cluster per workspace and
// general users have no ability to create other iteractive clusters.
func (p *Project) IsDevClusterJustReference() bool {
if p.DevCluster.ClusterName == "" {
return false
}
return reflect.DeepEqual(p.DevCluster, clusters.Cluster{
ClusterName: p.DevCluster.ClusterName,
})
}
// IsDatabricksProject returns true for folders with `databricks.yml`
// in the parent tree
func IsDatabricksProject() bool {
_, err := findProjectRoot()
return err == nil
}
func loadProjectConf() (prj Project, err error) {
root, err := findProjectRoot()
if err != nil {
return
}
config, err := os.Open(fmt.Sprintf("%s/%s", root, ConfigFile))
if err != nil {
return
}
raw, err := ioutil.ReadAll(config)
if err != nil {
return
}
err = yaml.Unmarshal(raw, &prj)
if err != nil {
return
}
return validateAndApplyProjectDefaults(prj)
}
func validateAndApplyProjectDefaults(prj Project) (Project, error) {
// defaultCluster := clusters.Cluster{
// NodeTypeID: "smallest",
// SparkVersion: "latest",
// AutoterminationMinutes: 30,
// }
return prj, nil
}
func findProjectRoot() (string, error) {
return findDirWithLeaf(ConfigFile)
}
// finds the original git repository the project is cloned from, so that
// we could automatically verify if this project is checked out in repos
// home folder of the user according to recommended best practices. Can
// also be used to determine a good enough default project name.
func getGitOrigin() (*url.URL, error) {
root, err := findDirWithLeaf(".git")
if err != nil {
return nil, err
}
file := fmt.Sprintf("%s/.git/config", root)
gitConfig, err := ini.Load(file)
if err != nil {
return nil, err
}
section := gitConfig.Section(`remote "origin"`)
if section == nil {
return nil, fmt.Errorf("remote `origin` is not defined in %s", file)
}
url := section.Key("url")
if url == nil {
return nil, fmt.Errorf("git origin url is not defined")
}
return gitUrls.Parse(url.Value())
}
func findDirWithLeaf(leaf string) (string, error) {
dir, err := os.Getwd()
if err != nil {
return "", fmt.Errorf("cannot find $PWD: %s", err)
}
for {
_, err = os.Stat(fmt.Sprintf("%s/%s", dir, leaf))
if errors.Is(err, os.ErrNotExist) {
// TODO: test on windows
next := path.Dir(dir)
if dir == next { // or stop at $HOME?..
return "", fmt.Errorf("cannot find %s anywhere", leaf)
}
dir = next
continue
}
if err != nil {
return "", err
}
return dir, nil
}
}

47
project/config_test.go Normal file
View File

@ -0,0 +1,47 @@
package project
import (
"fmt"
"os"
"path"
"testing"
"github.com/stretchr/testify/assert"
)
func TestFindProjectRoot(t *testing.T) {
wd, _ := os.Getwd()
defer os.Chdir(wd)
err := os.Chdir("internal/test/a/b/c")
assert.NoError(t, err)
root, err := findProjectRoot()
assert.NoError(t, err)
assert.Equal(t, fmt.Sprintf("%s/internal/test", wd), root)
}
func TestFindProjectRootInRoot(t *testing.T) {
wd, _ := os.Getwd()
defer os.Chdir(wd)
err := os.Chdir("/tmp")
assert.NoError(t, err)
_, err = findProjectRoot()
assert.EqualError(t, err, "cannot find databricks.yml anywhere")
}
func TestGetGitOrigin(t *testing.T) {
origin, err := getGitOrigin()
assert.NoError(t, err)
assert.Equal(t, "bricks.git", path.Base(origin.Path))
}
func TestLoadProjectConf(t *testing.T) {
wd, _ := os.Getwd()
defer os.Chdir(wd)
os.Chdir("internal/test/a/b/c")
prj, err := loadProjectConf()
assert.NoError(t, err)
assert.Equal(t, "dev", prj.Name)
assert.True(t, prj.IsDevClusterJustReference())
}

View File

@ -1,32 +0,0 @@
package project
import (
"context"
"github.com/databrickslabs/terraform-provider-databricks/common"
"github.com/databrickslabs/terraform-provider-databricks/commands"
)
type appContext int
const (
// DatabricksClient holds DatabricksClient
DatabricksClient appContext = 1
)
func Authenticate(ctx context.Context) context.Context {
client := common.CommonEnvironmentClient()
client.WithCommandExecutor(func(ctx context.Context, _ *common.DatabricksClient) common.CommandExecutor {
return commands.NewCommandsAPI(ctx, client)
})
return context.WithValue(ctx, DatabricksClient, client)
}
func ClientFromContext(ctx context.Context) *common.DatabricksClient {
client, ok := ctx.Value(DatabricksClient).(*common.DatabricksClient)
if !ok {
panic("authentication is not configured")
}
return client
}

View File

View File

@ -0,0 +1,4 @@
name: dev
profile: demo
dev_cluster:
cluster_name: Shared Autoscaling

View File

@ -3,36 +3,93 @@ package project
import (
"context"
"fmt"
"sync"
"github.com/databrickslabs/terraform-provider-databricks/clusters"
"github.com/databrickslabs/terraform-provider-databricks/commands"
"github.com/databrickslabs/terraform-provider-databricks/common"
"github.com/databrickslabs/terraform-provider-databricks/scim"
)
func CurrentUser(ctx context.Context) (scim.User, error) {
// TODO: memoize
return scim.NewUsersAPI(ctx, ClientFromContext(ctx)).Me()
// Current CLI application state
var Current inner
type inner struct {
mu sync.Mutex
once sync.Once
project *Project
client *common.DatabricksClient
me *scim.User
}
func ProjectName(ctx context.Context) string {
return "dev" // TODO: parse from config file
}
func DevelopmentCluster(ctx context.Context) (cluster clusters.ClusterInfo, err error) {
api := clusters.NewClustersAPI(ctx, ClientFromContext(ctx)) // TODO: rewrite with normal SDK
me, err := CurrentUser(ctx)
func (i *inner) init() {
i.mu.Lock()
defer i.mu.Unlock()
i.once.Do(func() {
client := common.CommonEnvironmentClient()
client.WithCommandExecutor(func(
ctx context.Context, c *common.DatabricksClient) common.CommandExecutor {
return commands.NewCommandsAPI(ctx, c)
})
i.client = client
prj, err := loadProjectConf()
if err != nil {
panic(err)
}
client.Profile = prj.Profile
i.project = &prj
})
}
func (i *inner) Client() *common.DatabricksClient {
i.init()
return i.client
}
func (i *inner) Project() *Project {
i.init()
return i.project
}
func (i *inner) Me() *scim.User {
i.mu.Lock()
defer i.mu.Unlock()
if i.me != nil {
return i.me
}
me, err := scim.NewUsersAPI(context.Background(), i.Client()).Me()
if err != nil {
panic(err)
}
i.me = &me
return &me
}
func (i *inner) DevelopmentCluster(ctx context.Context) (cluster clusters.ClusterInfo, err error) {
api := clusters.NewClustersAPI(ctx, i.Client()) // TODO: rewrite with normal SDK
if i.project.DevCluster == nil {
i.project.DevCluster = &clusters.Cluster{}
}
dc := i.project.DevCluster
if i.project.Isolation == Soft {
if i.project.IsDevClusterJustReference() {
err = fmt.Errorf("projects with soft isolation cannot have named clusters")
return
}
projectName := ProjectName(ctx)
devClusterName := fmt.Sprintf("dev/%s/%s", projectName, me.UserName)
return api.GetOrCreateRunningCluster(devClusterName)
me := i.Me()
dc.ClusterName = fmt.Sprintf("dev/%s/%s", i.project.Name, me.UserName)
}
if dc.ClusterName == "" {
err = fmt.Errorf("please either pick `isolation: soft` or specify a shared cluster name")
return
}
return api.GetOrCreateRunningCluster(dc.ClusterName, *dc)
}
func runCommandOnDev(ctx context.Context, language, command string) common.CommandResults {
client := ClientFromContext(ctx)
exec := client.CommandExecutor(ctx)
cluster, err := DevelopmentCluster(ctx)
cluster, err := Current.DevelopmentCluster(ctx)
exec := Current.Client().CommandExecutor(ctx)
if err != nil {
return common.CommandResults{
ResultType: "error",