From 102924439054fb396c8bd4b49ba4b4921518464d Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Sat, 14 May 2022 19:55:00 +0200 Subject: [PATCH] Added basic project configuration --- project/README.md | 6 + project/config.go | 159 ++++++++++++++++++++++++ project/config_test.go | 47 +++++++ project/context.go | 32 ----- project/internal/test/a/b/c/__init__.py | 0 project/internal/test/databricks.yml | 4 + project/project.go | 89 ++++++++++--- 7 files changed, 289 insertions(+), 48 deletions(-) create mode 100644 project/README.md create mode 100644 project/config.go create mode 100644 project/config_test.go delete mode 100644 project/context.go create mode 100644 project/internal/test/a/b/c/__init__.py create mode 100644 project/internal/test/databricks.yml diff --git a/project/README.md b/project/README.md new file mode 100644 index 00000000..1c39bbc7 --- /dev/null +++ b/project/README.md @@ -0,0 +1,6 @@ +Project Configuration +--- + +_Good implicit defaults is better than explicit complex configuration._ + +Regardless of current working directory, `bricks` finds project root with `databricks.yml` file up the directory tree. Technically, there might be couple of different Databricks Projects in the same Git repository, but the recommended scenario is to have just one `databricks.yml` in the root of Git repo. \ No newline at end of file diff --git a/project/config.go b/project/config.go new file mode 100644 index 00000000..f654f1a5 --- /dev/null +++ b/project/config.go @@ -0,0 +1,159 @@ +package project + +import ( + "errors" + "fmt" + "io/ioutil" + "net/url" + "os" + "path" + "reflect" + + "github.com/databrickslabs/terraform-provider-databricks/clusters" + "github.com/ghodss/yaml" + gitUrls "github.com/whilp/git-urls" + "gopkg.in/ini.v1" +) + +type Isolation string + +const ( + None Isolation = "" + Soft Isolation = "soft" +) + +// ConfigFile is the name of project configuration file +const ConfigFile = "databricks.yml" + +type Assertions struct { + Groups []string `json:"groups,omitempty"` + Secrets []string `json:"secrets,omitempty"` + ServicePrincipals []string `json:"service_principals,omitempty"` +} + +type Project struct { + Name string `json:"name"` // or do default from folder name?.. + Profile string `json:"profile,omitempty"` + Isolation Isolation `json:"isolation,omitempty"` + + // TODO: turn to pointer for the easy YAML marshalling + DevCluster *clusters.Cluster `json:"dev_cluster,omitempty"` + + // Assertions defines a list of configurations expected to be applied + // to the workspace by a higher-privileged user (or service principal) + // in order for the deploy command to work, as individual project teams + // in almost all the cases don’t have admin privileges on Databricks + // workspaces. + // + // This configuration simplifies the flexibility of individual project + // teams, make jobs deployment easier and portable across environments. + // This configuration block would contain the following entities to be + // created by administrator users or admin-level automation, like Terraform + // and/or SCIM provisioning. + Assertions *Assertions `json:"assertions,omitempty"` +} + +func (p Project) IsDevClusterDefined() bool { + return reflect.ValueOf(p.DevCluster).IsZero() +} + +// IsDevClusterJustReference denotes reference-only clusters. +// This conflicts with Soft isolation. Happens for cost-restricted projects, +// where there's only a single Shared Autoscaling cluster per workspace and +// general users have no ability to create other iteractive clusters. +func (p *Project) IsDevClusterJustReference() bool { + if p.DevCluster.ClusterName == "" { + return false + } + return reflect.DeepEqual(p.DevCluster, clusters.Cluster{ + ClusterName: p.DevCluster.ClusterName, + }) +} + +// IsDatabricksProject returns true for folders with `databricks.yml` +// in the parent tree +func IsDatabricksProject() bool { + _, err := findProjectRoot() + return err == nil +} + +func loadProjectConf() (prj Project, err error) { + root, err := findProjectRoot() + if err != nil { + return + } + config, err := os.Open(fmt.Sprintf("%s/%s", root, ConfigFile)) + if err != nil { + return + } + raw, err := ioutil.ReadAll(config) + if err != nil { + return + } + err = yaml.Unmarshal(raw, &prj) + if err != nil { + return + } + return validateAndApplyProjectDefaults(prj) +} + +func validateAndApplyProjectDefaults(prj Project) (Project, error) { + // defaultCluster := clusters.Cluster{ + // NodeTypeID: "smallest", + // SparkVersion: "latest", + // AutoterminationMinutes: 30, + // } + return prj, nil +} + +func findProjectRoot() (string, error) { + return findDirWithLeaf(ConfigFile) +} + +// finds the original git repository the project is cloned from, so that +// we could automatically verify if this project is checked out in repos +// home folder of the user according to recommended best practices. Can +// also be used to determine a good enough default project name. +func getGitOrigin() (*url.URL, error) { + root, err := findDirWithLeaf(".git") + if err != nil { + return nil, err + } + file := fmt.Sprintf("%s/.git/config", root) + gitConfig, err := ini.Load(file) + if err != nil { + return nil, err + } + section := gitConfig.Section(`remote "origin"`) + if section == nil { + return nil, fmt.Errorf("remote `origin` is not defined in %s", file) + } + url := section.Key("url") + if url == nil { + return nil, fmt.Errorf("git origin url is not defined") + } + return gitUrls.Parse(url.Value()) +} + +func findDirWithLeaf(leaf string) (string, error) { + dir, err := os.Getwd() + if err != nil { + return "", fmt.Errorf("cannot find $PWD: %s", err) + } + for { + _, err = os.Stat(fmt.Sprintf("%s/%s", dir, leaf)) + if errors.Is(err, os.ErrNotExist) { + // TODO: test on windows + next := path.Dir(dir) + if dir == next { // or stop at $HOME?.. + return "", fmt.Errorf("cannot find %s anywhere", leaf) + } + dir = next + continue + } + if err != nil { + return "", err + } + return dir, nil + } +} diff --git a/project/config_test.go b/project/config_test.go new file mode 100644 index 00000000..0b76db3b --- /dev/null +++ b/project/config_test.go @@ -0,0 +1,47 @@ +package project + +import ( + "fmt" + "os" + "path" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestFindProjectRoot(t *testing.T) { + wd, _ := os.Getwd() + defer os.Chdir(wd) + err := os.Chdir("internal/test/a/b/c") + assert.NoError(t, err) + root, err := findProjectRoot() + assert.NoError(t, err) + + assert.Equal(t, fmt.Sprintf("%s/internal/test", wd), root) +} + +func TestFindProjectRootInRoot(t *testing.T) { + wd, _ := os.Getwd() + defer os.Chdir(wd) + err := os.Chdir("/tmp") + assert.NoError(t, err) + _, err = findProjectRoot() + assert.EqualError(t, err, "cannot find databricks.yml anywhere") +} + +func TestGetGitOrigin(t *testing.T) { + origin, err := getGitOrigin() + assert.NoError(t, err) + assert.Equal(t, "bricks.git", path.Base(origin.Path)) +} + +func TestLoadProjectConf(t *testing.T) { + wd, _ := os.Getwd() + defer os.Chdir(wd) + os.Chdir("internal/test/a/b/c") + + prj, err := loadProjectConf() + assert.NoError(t, err) + assert.Equal(t, "dev", prj.Name) + assert.True(t, prj.IsDevClusterJustReference()) +} \ No newline at end of file diff --git a/project/context.go b/project/context.go deleted file mode 100644 index 7f179e26..00000000 --- a/project/context.go +++ /dev/null @@ -1,32 +0,0 @@ -package project - -import ( - "context" - - "github.com/databrickslabs/terraform-provider-databricks/common" - "github.com/databrickslabs/terraform-provider-databricks/commands" -) - - -type appContext int - -const ( - // DatabricksClient holds DatabricksClient - DatabricksClient appContext = 1 -) - -func Authenticate(ctx context.Context) context.Context { - client := common.CommonEnvironmentClient() - client.WithCommandExecutor(func(ctx context.Context, _ *common.DatabricksClient) common.CommandExecutor { - return commands.NewCommandsAPI(ctx, client) - }) - return context.WithValue(ctx, DatabricksClient, client) -} - -func ClientFromContext(ctx context.Context) *common.DatabricksClient { - client, ok := ctx.Value(DatabricksClient).(*common.DatabricksClient) - if !ok { - panic("authentication is not configured") - } - return client -} diff --git a/project/internal/test/a/b/c/__init__.py b/project/internal/test/a/b/c/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/project/internal/test/databricks.yml b/project/internal/test/databricks.yml new file mode 100644 index 00000000..3b8eb81f --- /dev/null +++ b/project/internal/test/databricks.yml @@ -0,0 +1,4 @@ +name: dev +profile: demo +dev_cluster: + cluster_name: Shared Autoscaling \ No newline at end of file diff --git a/project/project.go b/project/project.go index bdbd6ba3..67d7154e 100644 --- a/project/project.go +++ b/project/project.go @@ -3,40 +3,97 @@ package project import ( "context" "fmt" + "sync" "github.com/databrickslabs/terraform-provider-databricks/clusters" + "github.com/databrickslabs/terraform-provider-databricks/commands" "github.com/databrickslabs/terraform-provider-databricks/common" "github.com/databrickslabs/terraform-provider-databricks/scim" ) -func CurrentUser(ctx context.Context) (scim.User, error) { - // TODO: memoize - return scim.NewUsersAPI(ctx, ClientFromContext(ctx)).Me() +// Current CLI application state +var Current inner + +type inner struct { + mu sync.Mutex + once sync.Once + + project *Project + client *common.DatabricksClient + me *scim.User } -func ProjectName(ctx context.Context) string { - return "dev" // TODO: parse from config file +func (i *inner) init() { + i.mu.Lock() + defer i.mu.Unlock() + i.once.Do(func() { + client := common.CommonEnvironmentClient() + client.WithCommandExecutor(func( + ctx context.Context, c *common.DatabricksClient) common.CommandExecutor { + return commands.NewCommandsAPI(ctx, c) + }) + i.client = client + prj, err := loadProjectConf() + if err != nil { + panic(err) + } + client.Profile = prj.Profile + i.project = &prj + }) } -func DevelopmentCluster(ctx context.Context) (cluster clusters.ClusterInfo, err error) { - api := clusters.NewClustersAPI(ctx, ClientFromContext(ctx)) // TODO: rewrite with normal SDK - me, err := CurrentUser(ctx) +func (i *inner) Client() *common.DatabricksClient { + i.init() + return i.client +} + +func (i *inner) Project() *Project { + i.init() + return i.project +} + +func (i *inner) Me() *scim.User { + i.mu.Lock() + defer i.mu.Unlock() + if i.me != nil { + return i.me + } + me, err := scim.NewUsersAPI(context.Background(), i.Client()).Me() if err != nil { + panic(err) + } + i.me = &me + return &me +} + +func (i *inner) DevelopmentCluster(ctx context.Context) (cluster clusters.ClusterInfo, err error) { + api := clusters.NewClustersAPI(ctx, i.Client()) // TODO: rewrite with normal SDK + if i.project.DevCluster == nil { + i.project.DevCluster = &clusters.Cluster{} + } + dc := i.project.DevCluster + if i.project.Isolation == Soft { + if i.project.IsDevClusterJustReference() { + err = fmt.Errorf("projects with soft isolation cannot have named clusters") + return + } + me := i.Me() + dc.ClusterName = fmt.Sprintf("dev/%s/%s", i.project.Name, me.UserName) + } + if dc.ClusterName == "" { + err = fmt.Errorf("please either pick `isolation: soft` or specify a shared cluster name") return } - projectName := ProjectName(ctx) - devClusterName := fmt.Sprintf("dev/%s/%s", projectName, me.UserName) - return api.GetOrCreateRunningCluster(devClusterName) + return api.GetOrCreateRunningCluster(dc.ClusterName, *dc) } func runCommandOnDev(ctx context.Context, language, command string) common.CommandResults { - client := ClientFromContext(ctx) - exec := client.CommandExecutor(ctx) - cluster, err := DevelopmentCluster(ctx) + cluster, err := Current.DevelopmentCluster(ctx) + exec := Current.Client().CommandExecutor(ctx) if err != nil { return common.CommandResults{ ResultType: "error", - Summary: err.Error(), + Summary: err.Error(), } } return exec.Execute(cluster.ClusterID, language, command) @@ -44,4 +101,4 @@ func runCommandOnDev(ctx context.Context, language, command string) common.Comma func RunPythonOnDev(ctx context.Context, command string) common.CommandResults { return runCommandOnDev(ctx, "python", command) -} \ No newline at end of file +}