`bricks deploy` MVP

This commit is contained in:
Serge Smertin 2022-10-31 12:09:52 +01:00
parent 38a9dabcbe
commit 8cab9d92f3
No known key found for this signature in database
GPG Key ID: 92A95A66446BCE3F
40 changed files with 1678 additions and 269 deletions

View File

@ -22,7 +22,7 @@ builds:
goarch: '386' goarch: '386'
- goos: linux - goos: linux
goarch: '386' goarch: '386'
binary: '{{ .ProjectName }}_v{{ .Version }}' binary: '{{ .ProjectName }}'
archives: archives:
- format: zip - format: zip
name_template: '{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}' name_template: '{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}'
@ -30,7 +30,7 @@ checksum:
name_template: '{{ .ProjectName }}_{{ .Version }}_SHA256SUMS' name_template: '{{ .ProjectName }}_{{ .Version }}_SHA256SUMS'
algorithm: sha256 algorithm: sha256
snapshot: snapshot:
name_template: '{{ incpatch .Version }}-devel' name_template: '{{ .ProjectName }}'
changelog: changelog:
sort: asc sort: asc
filters: filters:

43
cmd/build/build.go Normal file
View File

@ -0,0 +1,43 @@
package build
import (
"fmt"
"github.com/databricks/bricks/cmd/root"
"github.com/databricks/bricks/lib/ui"
"github.com/databricks/bricks/project"
"github.com/spf13/cobra"
)
var buildCmd = &cobra.Command{
Use: "deploy",
Short: "Build and deploy artifacts",
PreRunE: project.Configure,
RunE: func(cmd *cobra.Command, args []string) error {
ctx := cmd.Context()
prj := project.Get(ctx)
// https://github.com/databrickslabs/mosaic - both maven and python
// https://github.com/databrickslabs/arcuate - only python, no DBR needed, but has notebooks
all, err := prj.LocalArtifacts(ctx)
if err != nil {
return err
}
if len(all) == 0 {
return fmt.Errorf("nothing to deploy")
}
err = ui.SpinStages(ctx, []ui.Stage{
{InProgress: "Preparing", Callback: prj.Prepare, Complete: "Prepared!"},
{InProgress: "Building", Callback: prj.Build, Complete: "Built!"},
{InProgress: "Uploading", Callback: prj.Upload, Complete: "Uploaded!"},
{InProgress: "Installing", Callback: prj.Install, Complete: "Installed!"},
})
if err != nil {
return err
}
return nil
},
}
func init() {
root.RootCmd.AddCommand(buildCmd)
}

30
cmd/build/build_test.go Normal file
View File

@ -0,0 +1,30 @@
package build
import (
"context"
"os"
"testing"
"github.com/databricks/bricks/cmd/root"
"github.com/stretchr/testify/assert"
)
func Test(t *testing.T) {
t.Skip()
ctx := context.Background()
os.Setenv("BRICKS_ROOT", "/Users/serge.smertin/git/labs/transpiler")
root.RootCmd.SetArgs([]string{"deploy"})
err := root.RootCmd.ExecuteContext(ctx)
assert.NoError(t, err)
}
func TestArcuate(t *testing.T) {
t.Skip()
ctx := context.Background()
os.Setenv("BRICKS_ROOT", "/Users/serge.smertin/git/labs/arcuate")
root.RootCmd.SetArgs([]string{"deploy"})
err := root.RootCmd.ExecuteContext(ctx)
assert.NoError(t, err)
}

36
cmd/build/exp/main.go Normal file
View File

@ -0,0 +1,36 @@
package main
import (
"context"
"fmt"
"os"
"time"
"github.com/databricks/bricks/lib/ui"
)
func main() {
ctx := context.Background()
err := ui.SpinStages(ctx, []ui.Stage{
{InProgress: "Building", Callback: func(ctx context.Context, status func(string)) error {
time.Sleep(1 * time.Second)
status("first message")
time.Sleep(1 * time.Second)
status("second message")
time.Sleep(1 * time.Second)
return nil
}, Complete: "Built!"},
{InProgress: "Uploading", Callback: func(ctx context.Context, status func(string)) error {
status("third message")
time.Sleep(1 * time.Second)
return nil
}, Complete: "Uploaded!"},
{InProgress: "Installing", Callback: func(ctx context.Context, status func(string)) error {
time.Sleep(1 * time.Second)
return fmt.Errorf("nope")
}, Complete: "Installed!"},
})
if err != nil {
os.Exit(1)
}
}

4
doc/cmd-deploy.md Normal file
View File

@ -0,0 +1,4 @@
Deploying workloads
---
`bricks deploy` command is used to deploy a [project](project-lifecycle.md) to a target Databricks Workspace.

45
doc/configuration.md Normal file
View File

@ -0,0 +1,45 @@
Configuration
---
There are two main configuration concepts of `bricks` CLI: per-project `databricks.yml` and per-machine `~/.databrickscfg` file.
## `~/.databrickscfg`
The purpose of this file is hold connectivity profiles with possibly clear-text credentials to Databricks Workspaces or Databricks Accounts. Almost all entries from this configuration file can be set through environment variables. The same configuration file can be read via the official Databricks GoLang SDK and Databricks Python SDK. Legacy Databricks CLI supports reading only `host`, `token`, `username`, and `password` configuration options.
* `host` _(string)_: Databricks host for either workspace endpoint or Accounts endpoint. Environment: `DATABRICKS_HOST`.
* `account_id` _(string)_: Databricks Account ID for Accounts endpoint. Environment: `DATABRICKS_ACCOUNT_ID`.
* `token` _(string)_: Personal Access Token (PAT). Environment: `DATABRICKS_TOKEN`.
* `username` _(string)_: Username part of basic authentication. Environment: `DATABRICKS_USERNAME`.
* `password` _(string)_: Password part of basic authentication. Environment: `DATABRICKS_PASSWORD`.
* `profile` _(string)_: Connection profile specified within `~/.databrickscfg`. Environment: `DATABRICKS_CONFIG_PROFILE`.
* `config_file` _(string)_: Location of the Databricks CLI credentials file. By default, it is located in `~/.databrickscfg`. Environment: `DATABRICKS_CONFIG_FILE`.
* `google_service_account` _(string)_: Google Compute Platform (GCP) Service Account e-mail used for impersonation in the Default Application Credentials Flow that does not require a password. Environment: `DATABRICKS_GOOGLE_SERVICE_ACCOUNT`
* `google_credentials` _(string)_: GCP Service Account Credentials JSON or the location of these credentials on the local filesustem. Environment: `GOOGLE_CREDENTIALS`.
* `azure_workspace_resource_id` _(string)_: Azure Resource Manager ID for Azure Databricks workspace, which is exhanged for a Host. Environment: `DATABRICKS_AZURE_RESOURCE_ID`.
* `azure_use_msi` _(string)_: Instruct to use Azure Managed Service Identity passwordless authentication flow for Service Principals. Environment: `ARM_USE_MSI`.
* `azure_client_secret` _(string)_: Azure Active Directory Service Principal secret. Environment: `ARM_CLIENT_SECRET`.
* `azure_client_id` _(string)_: Azure Active Directory Service Principal Application ID. Environment: `ARM_CLIENT_ID`
* `azure_tenant_id` _(string)_: Azure Active Directory Tenant ID. Environment: `ARM_TENANT_ID`
* `azure_environment` _(string)_: Azure Environment (Public, UsGov, China, Germany) has specific set of API endpoints. Defaults to `PUBLIC`. Environment: `ARM_ENVIRONMENT`.
* `auth_type` _(string)_: When multiple auth attributes are available in the environment, use the auth type specified by this argument. This argument also holds currently selected auth.
* `http_timeout_seconds` _(int)_: Number of seconds for HTTP timeout.
* `debug_truncate_bytes` _(int)_: Truncate JSON fields in debug logs above this limit. Default is 96. Environment: `DATABRICKS_DEBUG_TRUNCATE_BYTES`
* `debug_headers` _(bool)_: Debug HTTP headers of requests made by the application. Default is false, as headers contain sensitive data, like tokens. Environment: `DATABRICKS_DEBUG_HEADERS`.
* `rate_limit` _(int)_: Maximum number of requests per second made to Databricks REST API. Environment: `DATABRICKS_RATE_LIMIT`
## `databricks.yml`
Frequently, developers work on more than a single project from their workstations. Having a per-project `databricks.yml` configuration file created by [`bricks init`](project-lifecycle.md#init) helps achieving resource isolation and connectivity credentials flexibility.
### Development Cluster
Every project, with the exception of several [project flavors](project-flavors.md) may have a Databricks Cluster, where groups or individual data engineers run Spark queries in the Databricks Runtime. It's also possible to [isolate](#isolation-levels) clusters.
### Project Flavor
[Project Flavors](project-flavors.md) are the features, that detect the intended behavior of deployments during the [project lifecycle](project-lifecycle.md).
### Isolation Levels
It's possible to achieve _soft isolation_ levels for multiple developers to independently work on the same project, like having different branches in Git.

13
doc/project-flavors.md Normal file
View File

@ -0,0 +1,13 @@
Project Flavors
---
`bricks` CLI detects variout project flavors dynamically every run, though sometimes you may be interested in overriding the defaults.
## Maven
If there's a `pom.xml` file in the same folder as [`databricks.yml`](configuration.md), `mvn clean package` is invoked during [`build`](project-lifecycle.md#build) stage, followed by uploading `target/$artifactId-$version.jar` file to DBFS during the [`upload`](project-lifecycle.md#upload) stage, installing it as a library on [Development Cluster](configuration.md#development-cluster) and waiting for the installation to succeed, reporting the error back otherwise.
## Python
If there's a `setup.py` file in the [project root](configuration.md), ...

28
doc/project-lifecycle.md Normal file
View File

@ -0,0 +1,28 @@
Project Lifecycle
---
Project lifecycle consists of different execution phases. This document aims at describing them as toroughly as possible.
## `init`
`bricks init` creates a [`databricks.yml`](configuration.md) file in the directory, where `bricks` CLI was invoked. It walks you through the interactive command prompts. The goal of this stage is to setup project flavor and connectivity to a Databricks workspapce.
## `prepare`
`bricks prepare` prepares the local filesystem for the following lifecycle stages, like rolling out the relevant Virtual Environment for [Python projects](project-flavors.md#python).
## `build`
`bricks build` triggers the relevant commands to package artifacts, like Java or Scala [JARs](project-flavors.md#maven) or Python [Wheels](project-flavors.md#python). It's also possible to have a multi-flavor project, like [Mosaic](https://github.com/databrickslabs/mosaic), where built Wheel depends on a built JAR.
## `upload`
`bricks upload` takes the artifacts created by [`bricks build`](#build) and uploads them to a path following configured [isolation level](configuration.md#isolation-levels).
## `deploy`
.. creates clusters
## `install`
`bricks install` takes remote paths created by [`bricks upload`](#upload) for artifacts created by [`bricks build`](#build) and installs them on [Development Cluster](configuration.md#development-cluster) following the configured [isolation level](configuration.md#isolation-levels).

@ -1 +1 @@
Subproject commit b719dadd27a5cb6c67db0b6ddef5458ec31cc8c0 Subproject commit 6cb641a6288ad0e4399bcf7cdf0156a12919bdb3

12
go.mod
View File

@ -13,27 +13,31 @@ require (
github.com/spf13/cobra v1.5.0 // Apache 2.0 github.com/spf13/cobra v1.5.0 // Apache 2.0
github.com/stretchr/testify v1.8.0 // MIT github.com/stretchr/testify v1.8.0 // MIT
github.com/whilp/git-urls v1.0.0 // MIT github.com/whilp/git-urls v1.0.0 // MIT
golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 // BSD-3-Clause golang.org/x/mod v0.6.0 // BSD-3-Clause
gopkg.in/ini.v1 v1.67.0 // Apache 2.0 gopkg.in/ini.v1 v1.67.0 // Apache 2.0
) )
require golang.org/x/sync v0.0.0-20220513210516-0976fa681c29
require ( require (
cloud.google.com/go/compute v1.6.1 // indirect cloud.google.com/go/compute v1.6.1 // indirect
github.com/briandowns/spinner v1.19.0
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e // indirect github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e // indirect
github.com/davecgh/go-spew v1.1.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect
github.com/fatih/color v1.7.0
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
github.com/golang/protobuf v1.5.2 // indirect github.com/golang/protobuf v1.5.2 // indirect
github.com/google/go-querystring v1.1.0 // indirect github.com/google/go-querystring v1.1.0 // indirect
github.com/inconshreveable/mousetrap v1.0.0 // indirect github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/kr/text v0.2.0 // indirect github.com/kr/text v0.2.0 // indirect
github.com/mattn/go-colorable v0.1.2 // indirect
github.com/mattn/go-isatty v0.0.8 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect github.com/spf13/pflag v1.0.5 // indirect
go.opencensus.io v0.23.0 // indirect go.opencensus.io v0.23.0 // indirect
golang.org/x/exp v0.0.0-20221028150844-83b7d23a625f
golang.org/x/net v0.0.0-20220526153639-5463443f8c37 // indirect golang.org/x/net v0.0.0-20220526153639-5463443f8c37 // indirect
golang.org/x/oauth2 v0.0.0-20220628200809-02e64fa58f26 // indirect golang.org/x/oauth2 v0.0.0-20220628200809-02e64fa58f26 // indirect
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a // indirect golang.org/x/sync v0.0.0-20220513210516-0976fa681c29
golang.org/x/sys v0.1.0 // indirect
golang.org/x/text v0.3.7 // indirect golang.org/x/text v0.3.7 // indirect
golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac // indirect golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac // indirect
google.golang.org/api v0.82.0 // indirect google.golang.org/api v0.82.0 // indirect

18
go.sum
View File

@ -57,6 +57,8 @@ github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAE
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
github.com/briandowns/spinner v1.19.0 h1:s8aq38H+Qju89yhp89b4iIiMzMm8YN3p6vGpwyh/a8E=
github.com/briandowns/spinner v1.19.0/go.mod h1:mQak9GHqbspjC/5iUx3qMlIho8xBS/ppAL/hX5SmPJU=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
@ -91,6 +93,8 @@ github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.m
github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0= github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0=
github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys=
github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
@ -195,6 +199,10 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/manifoldco/promptui v0.9.0 h1:3V4HzJk1TtXW1MTZMP7mdlwbBpIinw3HztaIlYthEiA= github.com/manifoldco/promptui v0.9.0 h1:3V4HzJk1TtXW1MTZMP7mdlwbBpIinw3HztaIlYthEiA=
github.com/manifoldco/promptui v0.9.0/go.mod h1:ka04sppxSGFAtxX0qhlYQjISsg9mR4GWtQEhdbn6Pgg= github.com/manifoldco/promptui v0.9.0/go.mod h1:ka04sppxSGFAtxX0qhlYQjISsg9mR4GWtQEhdbn6Pgg=
github.com/mattn/go-colorable v0.1.2 h1:/bC9yWikZXAL9uJdulbSfyVNIR3n3trXl+v8+1sx8mU=
github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
github.com/mattn/go-isatty v0.0.8 h1:HLtExJ+uU2HOZ+wI0Tt5DtUDrx8yhUqDcp7fYERX4CE=
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 h1:KoWmjvw+nsYOo29YJK9vDA65RGE3NrOnUtO7a+RF9HU= github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 h1:KoWmjvw+nsYOo29YJK9vDA65RGE3NrOnUtO7a+RF9HU=
@ -252,6 +260,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
golang.org/x/exp v0.0.0-20221028150844-83b7d23a625f h1:Al51T6tzvuh3oiwX11vex3QgJ2XTedFPGmbEVh8cdoc=
golang.org/x/exp v0.0.0-20221028150844-83b7d23a625f/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
@ -277,8 +287,8 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 h1:kQgndtyPBW/JIYERgdxfwMYh3AVStj88WQTlNDi2a+o= golang.org/x/mod v0.6.0 h1:b9gGHsz9/HhJ3HF5DHQytPpuwocVTChQJK3AvoLRD5I=
golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= golang.org/x/mod v0.6.0/go.mod h1:4mET923SAdbXp2ki8ey+zGs1SLqsuM2Y0uvdZR/fUNI=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@ -359,6 +369,7 @@ golang.org/x/sync v0.0.0-20220513210516-0976fa681c29/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
@ -413,8 +424,9 @@ golang.org/x/sys v0.0.0-20220227234510-4e6760a101f9/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220328115105-d36c6a25d886/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220328115105-d36c6a25d886/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220502124256-b6088ccd6cba/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220502124256-b6088ccd6cba/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a h1:dGzPydgVsqGcTRVwiLJ1jVbufYwmzD3LfVPLKsKg+0k=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

88
lib/dbr/info.go Normal file
View File

@ -0,0 +1,88 @@
package dbr
import (
"context"
"encoding/json"
"fmt"
"github.com/databricks/databricks-sdk-go/retries"
"github.com/databricks/databricks-sdk-go/service/clusters"
"github.com/databricks/databricks-sdk-go/workspaces"
)
type RuntimeInfo struct {
Name string `json:"name"`
SparkVersion string `json:"spark_version"`
PythonVersion string `json:"python_version"`
PyPI []Package `json:"pypi"`
Jars []Package `json:"jars"`
}
type Package struct {
Group string `json:"group,omitempty"`
Name string `json:"name"`
Version string `json:"version"`
}
func (pkg *Package) PyPiName() string {
return fmt.Sprintf("%s==%s", pkg.Name, pkg.Version)
}
func GetRuntimeInfo(ctx context.Context, w *workspaces.WorkspacesClient,
clusterId string, status func(string)) (*RuntimeInfo, error) {
cluster, err := w.Clusters.GetByClusterId(ctx, clusterId)
if err != nil {
return nil, err
}
if !cluster.IsRunningOrResizing() {
_, err = w.Clusters.StartByClusterIdAndWait(ctx, clusterId,
func(i *retries.Info[clusters.ClusterInfo]) {
status(i.Info.StateMessage)
})
if err != nil {
return nil, err
}
}
command := w.CommandExecutor.Execute(ctx, clusterId, "python", infoScript)
if command.Failed() {
return nil, command.Err()
}
var info RuntimeInfo
err = json.Unmarshal([]byte(command.Text()), &info)
if err != nil {
return nil, err
}
return &info, nil
}
const infoScript = `import pkg_resources, json, sys, platform, subprocess
from pyspark.version import __version__
jars = []
for j in subprocess.check_output(['ls', '-1', '/databricks/jars']).decode().split("\n"):
if '--mvn--' in j or '--maven-trees--' in j:
split = j.split('--')[-1][:-4].split('__')
if not len(split) == 3:
continue
group, artifactId, version = split
jars.append({
'group': group,
'name': artifactId,
'version': version,
})
jars = sorted(jars, key=lambda jar: (jar['group'], jar['name']))
python_packages = [
{"name": n, "version": v}
for n, v in sorted([(i.key, i.version)
for i in pkg_resources.working_set])]
python_packages = sorted(python_packages, key=lambda x: x['name'])
runtime = spark.conf.get('spark.databricks.clusterUsageTags.sparkVersion')
print(json.dumps({
'name': runtime,
'spark_version': __version__[0:5],
'python_version': platform.python_version(),
'pypi': python_packages,
'jars': jars,
}))`

73
lib/fileset/file.go Normal file
View File

@ -0,0 +1,73 @@
package fileset
import (
"io"
"io/fs"
"log"
"os"
"path"
"regexp"
"strings"
"time"
)
type File struct {
fs.DirEntry
Absolute string
Relative string
}
func (f File) Modified() (ts time.Time) {
info, err := f.Info()
if err != nil {
// return default time, beginning of epoch
return ts
}
return info.ModTime()
}
func (fi File) Ext(suffix string) bool {
return strings.HasSuffix(fi.Name(), suffix)
}
func (fi File) Dir() string {
return path.Dir(fi.Absolute)
}
func (fi File) MustMatch(needle string) bool {
return fi.Match(regexp.MustCompile(needle))
}
func (fi File) FindAll(needle *regexp.Regexp) (all []string, err error) {
raw, err := fi.Raw()
if err != nil {
log.Printf("[ERROR] read %s: %s", fi.Absolute, err)
return nil, err
}
for _, v := range needle.FindAllStringSubmatch(string(raw), -1) {
all = append(all, v[1])
}
return all, nil
}
func (fi File) Match(needle *regexp.Regexp) bool {
raw, err := fi.Raw()
if err != nil {
log.Printf("[ERROR] read %s: %s", fi.Absolute, err)
return false
}
return needle.Match(raw)
}
func (fi File) Raw() ([]byte, error) {
f, err := fi.Open()
if err != nil {
return nil, err
}
return io.ReadAll(f)
}
func (fi File) Open() (*os.File, error) {
return os.Open(fi.Absolute)
}

98
lib/fileset/fileset.go Normal file
View File

@ -0,0 +1,98 @@
package fileset
import (
"fmt"
"os"
"path"
"path/filepath"
"regexp"
"strings"
)
type FileSet []File
func (fi FileSet) Root() string {
return strings.TrimSuffix(
strings.ReplaceAll(fi[0].Absolute, fi[0].Relative, ""),
"/")
}
func (fi FileSet) FirstMatch(pathRegex, needleRegex string) *File {
path := regexp.MustCompile(pathRegex)
needle := regexp.MustCompile(needleRegex)
for _, v := range fi {
if !path.MatchString(v.Absolute) {
continue
}
if v.Match(needle) {
return &v
}
}
return nil
}
func (fi FileSet) FindAll(pathRegex, needleRegex string) (map[File][]string, error) {
path := regexp.MustCompile(pathRegex)
needle := regexp.MustCompile(needleRegex)
all := map[File][]string{}
for _, v := range fi {
if !path.MatchString(v.Absolute) {
continue
}
vall, err := v.FindAll(needle)
if err != nil {
return nil, fmt.Errorf("%s: %w", v.Relative, err)
}
all[v] = vall
}
return all, nil
}
func (fi FileSet) Exists(pathRegex, needleRegex string) bool {
m := fi.FirstMatch(pathRegex, needleRegex)
return m != nil
}
func RecursiveChildren(dir, root string) (found FileSet, err error) {
// TODO: add options to skip, like current.Name() == "vendor"
queue, err := ReadDir(dir, root)
if err != nil {
return nil, err
}
for len(queue) > 0 {
current := queue[0]
queue = queue[1:]
if !current.IsDir() {
current.Relative = strings.ReplaceAll(current.Absolute, dir+"/", "")
found = append(found, current)
continue
}
children, err := ReadDir(current.Absolute, root)
if err != nil {
return nil, err
}
queue = append(queue, children...)
}
return found, nil
}
func ReadDir(dir, root string) (queue []File, err error) {
f, err := os.Open(dir)
if err != nil {
return
}
defer f.Close()
dirs, err := f.ReadDir(-1)
if err != nil {
return
}
for _, v := range dirs {
absolute := path.Join(dir, v.Name())
relative, err := filepath.Rel(root, absolute)
if err != nil {
return nil, err
}
queue = append(queue, File{v, absolute, relative})
}
return
}

98
lib/flavor/mvn/mvn.go Normal file
View File

@ -0,0 +1,98 @@
package mvn
import (
"context"
"encoding/xml"
"fmt"
"io"
"os"
"path/filepath"
"github.com/databricks/bricks/lib/flavor"
"github.com/databricks/bricks/lib/spawn"
"github.com/databricks/databricks-sdk-go/service/libraries"
)
type Pom struct {
Name string `xml:"name"`
GroupID string `xml:"groupId"`
ArtifactID string `xml:"artifactId"`
Version string `xml:"version"`
}
func (pom *Pom) Jar() string {
return fmt.Sprintf("%s-%s.jar", pom.ArtifactID, pom.Version)
}
type Maven struct {
SkipTests bool `json:"skip_tests,omitempty"`
}
func (mvn *Maven) RequiresCluster() bool {
return true
}
// Java libraries always require cluster restart
func (mvn *Maven) RequiresRestart() bool {
return true
}
func (mvn *Maven) Detected(prj flavor.Project) bool {
_, err := os.Stat(filepath.Join(prj.Root(), "pom.xml"))
return err == nil
}
func (mvn *Maven) LocalArtifacts(ctx context.Context, prj flavor.Project) (flavor.Artifacts, error) {
pom, err := mvn.Pom(prj.Root())
if err != nil {
return nil, err
}
return flavor.Artifacts{
{
Flavor: mvn,
Library: libraries.Library{
Jar: fmt.Sprintf("%s/target/%s", prj.Root(), pom.Jar()),
},
},
}, nil
}
func (mvn *Maven) Pom(root string) (*Pom, error) {
// TODO: perhaps we should call effective-pom, specially once
// we start comparing local spark version and the one on DBR
pomFile := fmt.Sprintf("%s/pom.xml", root)
pomHandle, err := os.Open(pomFile)
if err != nil {
return nil, fmt.Errorf("open %s: %w", pomFile, err)
}
pomBytes, err := io.ReadAll(pomHandle)
if err != nil {
return nil, fmt.Errorf("read %s: %w", pomFile, err)
}
var pom Pom
err = xml.Unmarshal(pomBytes, &pom)
if err != nil {
return nil, fmt.Errorf("parse %s: %w", pomFile, err)
}
return &pom, nil
}
func (mvn *Maven) Build(ctx context.Context, prj flavor.Project, status func(string)) error {
mavenPath, err := spawn.DetectExecutable(ctx, "mvn")
if err != nil {
return fmt.Errorf("no Maven installed: %w", err)
}
// report back the name of the JAR to the user
pom, _ := mvn.Pom(prj.Root())
status(fmt.Sprintf("Buidling %s", pom.Jar()))
args := []string{fmt.Sprintf("--file=%s/pom.xml", prj.Root())}
args = append(args, "-DskipTests=true")
args = append(args, "clean", "package")
_, err = spawn.ExecAndPassErr(ctx, mavenPath, args...)
if err != nil {
// TODO: figure out error reporting in a generic way
// one of the options is to re-run the same command with stdout forwarding
return fmt.Errorf("mvn package: %w", err)
}
return nil
}

View File

@ -0,0 +1,41 @@
package notebooks
import (
"context"
"fmt"
"os"
"path/filepath"
"github.com/databricks/bricks/lib/fileset"
"github.com/databricks/bricks/lib/flavor"
)
type Notebooks struct {
Folder string `json:"folder"`
}
func (n *Notebooks) Detected(prj flavor.Project) bool {
_, err := os.Stat(filepath.Join(prj.Root(), n.Folder))
return err == nil
}
func (n *Notebooks) LocalArtifacts(ctx context.Context, prj flavor.Project) (flavor.Artifacts, error) {
all := flavor.Artifacts{}
found, err := fileset.RecursiveChildren(filepath.Join(prj.Root(), n.Folder), prj.Root())
if err != nil {
return nil, fmt.Errorf("list notebooks: %w", err)
}
for _, f := range found {
if !f.MustMatch("# Databricks notebook source") {
continue
}
all = append(all, flavor.Artifact{
Notebook: &flavor.Notebook{
LocalAbsolute: f.Absolute,
RemoteRelative: f.Relative, // TODO: TBD behavior with regards to isolation
},
Flavor: n,
})
}
return all, nil
}

120
lib/flavor/pkg.go Normal file
View File

@ -0,0 +1,120 @@
package flavor
import (
"context"
"path/filepath"
"strings"
"github.com/databricks/databricks-sdk-go/service/libraries"
"github.com/databricks/databricks-sdk-go/service/workspace"
"github.com/databricks/databricks-sdk-go/workspaces"
)
type Project interface {
Root() string
WorkspacesClient() *workspaces.WorkspacesClient
GetDevelopmentClusterId(ctx context.Context) (clusterId string, err error)
}
type Flavor interface {
// Detected returns true on successful metadata checks
Detected(Project) bool
// LocalArtifacts show (cached) relevant files that _should_ exist
// on local filesystem
LocalArtifacts(context.Context, Project) (Artifacts, error)
}
type Notebook struct {
LocalAbsolute string
RemoteRelative string
}
type Artifact struct {
libraries.Library
Notebook *Notebook
Flavor Flavor
}
type notebookLanguageFormat struct {
Language workspace.ImportLanguage
Format workspace.ImportFormat
Overwrite bool
}
var extMap = map[string]notebookLanguageFormat{
".scala": {"SCALA", "SOURCE", true},
".py": {"PYTHON", "SOURCE", true},
".sql": {"SQL", "SOURCE", true},
".r": {"R", "SOURCE", true},
".dbc": {"", "DBC", false},
}
func (a Artifact) IsLibrary() bool {
return a.Library.String() != "unknown"
}
func (a Artifact) NotebookInfo() (*notebookLanguageFormat, bool) {
if a.Notebook == nil {
return nil, false
}
ext := strings.ToLower(filepath.Ext(a.Notebook.LocalAbsolute))
f, ok := extMap[ext]
return &f, ok
}
type Kind int
const (
LocalNotebook Kind = iota
LocalJar
LocalWheel
LocalEgg
RegistryLibrary
)
func (k Kind) RequiresBuild() bool {
switch k {
case LocalJar, LocalWheel, LocalEgg:
return true
default:
return false
}
}
func (a Artifact) KindAndLocation() (Kind, string) {
if a.Notebook != nil {
return LocalNotebook, a.Notebook.LocalAbsolute
}
if a.Jar != "" {
return LocalJar, a.Jar
}
if a.Whl != "" {
return LocalWheel, a.Whl
}
if a.Egg != "" {
return LocalEgg, a.Egg
}
return RegistryLibrary, ""
}
type Artifacts []Artifact
func (a Artifacts) RequiresBuild() bool {
for _, v := range a {
k, _ := v.KindAndLocation()
if k.RequiresBuild() {
return true
}
}
return false
}
func (a Artifacts) HasLibraries() bool {
for _, v := range a {
if v.IsLibrary() {
return true
}
}
return false
}

View File

@ -1,12 +1,14 @@
package python package py
import ( import (
"context" "context"
"encoding/json" "encoding/json"
"fmt" "fmt"
"log" "log"
"regexp"
"strings" "strings"
"github.com/databricks/bricks/python"
"golang.org/x/mod/semver" "golang.org/x/mod/semver"
) )
@ -14,7 +16,11 @@ type Dependency struct {
Name string Name string
Operator string Operator string
Version string Version string
Location string // @ file:///usr/loca Location string // @ file:///usr/local
}
func (d Dependency) NormalizedName() string {
return strings.ToLower(d.Name)
} }
func (d Dependency) CanonicalVersion() string { func (d Dependency) CanonicalVersion() string {
@ -24,33 +30,25 @@ func (d Dependency) CanonicalVersion() string {
type Environment []Dependency type Environment []Dependency
func (e Environment) Has(name string) bool { func (e Environment) Has(name string) bool {
dep := DependencyFromSpec(name)
for _, d := range e { for _, d := range e {
if d.Name == name { if d.NormalizedName() == dep.NormalizedName() {
return true return true
} }
} }
return false return false
} }
func Freeze(ctx context.Context) (Environment, error) {
out, err := Py(ctx, "-m", "pip", "freeze")
if err != nil {
return nil, err
}
env := Environment{}
deps := strings.Split(out, "\n")
for _, raw := range deps {
env = append(env, DependencyFromSpec(raw))
}
return env, nil
}
func DependencyFromSpec(raw string) (d Dependency) { func DependencyFromSpec(raw string) (d Dependency) {
raw = strings.ToLower(strings.TrimSpace(raw))
// TODO: write a normal parser for this // TODO: write a normal parser for this
// see https://peps.python.org/pep-0508/#grammar
rawSplit := strings.Split(raw, "==") rawSplit := strings.Split(raw, "==")
if len(rawSplit) != 2 { if len(rawSplit) != 2 {
log.Printf("[DEBUG] Skipping invalid dep: %s", raw) log.Printf("[DEBUG] Skipping invalid dep: %s", raw)
return return Dependency{
Name: raw,
}
} }
d.Name = rawSplit[0] d.Name = rawSplit[0]
d.Operator = "==" d.Operator = "=="
@ -65,6 +63,7 @@ type Distribution struct {
Version string `json:"version"` Version string `json:"version"`
Packages []string `json:"packages"` Packages []string `json:"packages"`
InstallRequires []string `json:"install_requires,omitempty"` InstallRequires []string `json:"install_requires,omitempty"`
TestsRequire []string `json:"tests_require,omitempty"`
} }
// InstallEnvironment returns only direct install dependencies // InstallEnvironment returns only direct install dependencies
@ -75,18 +74,30 @@ func (d Distribution) InstallEnvironment() (env Environment) {
return return
} }
// See: ttps://peps.python.org/pep-0503/#normalized-names
var pep503 = regexp.MustCompile(`[-_.]+`)
// NormalizedName returns PEP503-compatible Python Package Index project name. // NormalizedName returns PEP503-compatible Python Package Index project name.
// As per PEP 426 the only valid characters in a name are the ASCII alphabet, // As per PEP 426 the only valid characters in a name are the ASCII alphabet,
// ASCII numbers, ., -, and _. The name should be lowercased with all runs of // ASCII numbers, ., -, and _. The name should be lowercased with all runs of
// the characters ., -, or _ replaced with a single - character. // the characters ., -, or _ replaced with a single - character.
func (d Distribution) NormalizedName() string { func (d Distribution) NormalizedName() string {
// TODO: implement https://peps.python.org/pep-0503/#normalized-names return pep503.ReplaceAllString(d.Name, "-")
return d.Name }
// See: https://peps.python.org/pep-0491/#escaping-and-unicode
var pep491 = regexp.MustCompile(`[^\w\d.]+`)
func (d Distribution) WheelName() string {
// Each component of the filename is escaped by replacing runs
// of non-alphanumeric characters with an underscore _
distName := pep491.ReplaceAllString(d.NormalizedName(), "_")
return fmt.Sprintf("%s-%s-py3-none-any.whl", distName, d.Version)
} }
// ReadDistribution "parses" metadata from setup.py file. // ReadDistribution "parses" metadata from setup.py file.
func ReadDistribution(ctx context.Context) (d Distribution, err error) { func ReadDistribution(ctx context.Context) (d Distribution, err error) {
out, err := PyInline(ctx, ` out, err := python.PyInline(ctx, `
import setuptools, json, sys import setuptools, json, sys
setup_config = {} # actual args for setuptools.dist.Distribution setup_config = {} # actual args for setuptools.dist.Distribution
def capture(**kwargs): global setup_config; setup_config = kwargs def capture(**kwargs): global setup_config; setup_config = kwargs

18
lib/flavor/py/env_test.go Normal file
View File

@ -0,0 +1,18 @@
package py
import (
"context"
"testing"
"github.com/databricks/bricks/lib/spawn"
"github.com/stretchr/testify/assert"
)
func TestPyInlineX(t *testing.T) {
ctx := spawn.WithRoot(context.Background(), "testdata/simple-python-wheel")
dist, err := ReadDistribution(ctx)
assert.NoError(t, err)
assert.Equal(t, "dummy", dist.Name)
assert.Equal(t, "dummy", dist.Packages[0])
assert.True(t, dist.InstallEnvironment().Has("requests"))
}

307
lib/flavor/py/setup_py.go Normal file
View File

@ -0,0 +1,307 @@
package py
import (
"context"
"encoding/json"
"errors"
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
"golang.org/x/exp/slices"
"github.com/databricks/bricks/lib/dbr"
"github.com/databricks/bricks/lib/flavor"
"github.com/databricks/bricks/lib/spawn"
"github.com/databricks/bricks/python"
"github.com/databricks/databricks-sdk-go/databricks/apierr"
"github.com/databricks/databricks-sdk-go/service/commands"
"github.com/databricks/databricks-sdk-go/service/libraries"
)
type SetupDotPy struct {
SetupPy string `json:"setup_py,omitempty"`
MirrorLibraries bool `json:"mirror_libraries,omitempty"`
venv string
wheelName string
}
func (s *SetupDotPy) RequiresCluster() bool {
return true
}
// Python libraries do not require a restart
func (s *SetupDotPy) RequiresRestart() bool {
return false
}
func (s *SetupDotPy) setupPyLoc(prj flavor.Project) string {
if s.SetupPy == "" {
s.SetupPy = "setup.py"
}
return filepath.Join(prj.Root(), s.SetupPy)
}
// We detect only setuptools build backend for now. Hatchling, PDM,
// and Flit _might_ be added in some distant future.
//
// See: https://packaging.python.org/en/latest/tutorials/packaging-projects/
func (s *SetupDotPy) Detected(prj flavor.Project) bool {
_, err := os.Stat(s.setupPyLoc(prj))
return err == nil
}
// readDistribution "parses" metadata from setup.py file from context
// of current project root and virtual env
func (s *SetupDotPy) readDistribution(ctx context.Context, prj flavor.Project) (d Distribution, err error) {
ctx = spawn.WithRoot(ctx, filepath.Dir(s.setupPyLoc(prj)))
out, err := python.Py(ctx, "-c", commands.TrimLeadingWhitespace(`
import setuptools, json, sys
setup_config = {} # actual args for setuptools.dist.Distribution
def capture(**kwargs): global setup_config; setup_config = kwargs
setuptools.setup = capture
import setup
json.dump(setup_config, sys.stdout)`))
if err != nil {
return
}
err = json.Unmarshal([]byte(out), &d)
return
}
func (s *SetupDotPy) Prepare(ctx context.Context, prj flavor.Project, status func(string)) error {
venv, err := s.fastDetectVirtualEnv(prj.Root())
if err != nil {
return err
}
s.venv = venv
if s.venv == "" {
// this allows CLI to be usable in existing projects with existing virtualenvs
venv = filepath.Join(prj.Root(), ".databricks") // TODO: integrate with pipenv
err := os.MkdirAll(venv, 0o700)
if err != nil {
return fmt.Errorf("mk venv: %w", err)
}
status(fmt.Sprintf("Creating virtualenv in %s", venv))
_, err = python.Py(ctx, "-m", "venv", venv)
if err != nil {
return fmt.Errorf("create venv: %w", err)
}
s.venv = venv
status("Upgrading pip")
_, err = s.Pip(ctx, "install", "--upgrade", "pip", "wheel")
if err != nil {
return fmt.Errorf("upgrade pip: %w", err)
}
}
env, err := s.Freeze(ctx)
if err != nil {
return fmt.Errorf("pip freeze: %s", err)
}
var remotePkgs []string
d, err := s.readDistribution(ctx, prj)
if err != nil {
return fmt.Errorf("setup.py: %s", err)
}
if s.MirrorLibraries {
// TODO: name `MirrorLibraries` is TBD
// TODO: must be part of init command survey
status("Fetching remote libraries")
remoteInfo, err := s.runtimeInfo(ctx, prj, status)
if err != nil && errors.As(err, &apierr.APIError{}) {
return err
}
// TODO: check Spark compatibility with locall install_requires
// TODO: check Python version compatibility with local virtualenv
if err != errNoCluster {
skipLibs := []string{
"dbus-python",
"distro-info",
"pip",
"psycopg2",
"pygobject",
"python-apt",
"requests-unixsocket",
"setuptools",
"unattended-upgrades",
"wheel",
}
PYPI:
for _, pkg := range remoteInfo.PyPI {
if env.Has(pkg.PyPiName()) {
continue
}
if pkg.Name == d.NormalizedName() {
// skip installing self
continue
}
for _, skip := range skipLibs {
if skip == pkg.Name {
continue PYPI
}
}
remotePkgs = append(remotePkgs, pkg.PyPiName())
}
}
}
type depList struct {
name string
packages []string
}
dbrDepsName := "remote cluster"
dependencyLists := []depList{
{dbrDepsName, remotePkgs},
{"install_requires", d.InstallRequires},
{"tests_require", d.TestsRequire},
}
for _, deps := range dependencyLists {
for _, dep := range deps.packages {
if env.Has(dep) {
continue
}
status(fmt.Sprintf("Installing %s in virtualenv (%s)", dep, deps.name))
_, err = s.Pip(ctx, "install", "--prefer-binary", dep)
if err != nil && deps.name == dbrDepsName &&
strings.Contains(err.Error(), "Could not find a version") {
continue
}
if err != nil {
return fmt.Errorf("%s: %w", dep, err)
}
// repeatedly run pip freeze so that we potentially have less installs
env, err = s.Freeze(ctx)
if err != nil {
return fmt.Errorf("pip freeze: %s", err)
}
}
}
return nil
}
var errNoCluster = errors.New("no development cluster")
func (s *SetupDotPy) runtimeInfo(ctx context.Context, prj flavor.Project,
status func(string)) (*dbr.RuntimeInfo, error) {
clusterId, err := prj.GetDevelopmentClusterId(ctx)
if err != nil && errors.As(err, &apierr.APIError{}) {
return nil, err
}
if err != nil {
return nil, errNoCluster
}
return dbr.GetRuntimeInfo(ctx, prj.WorkspacesClient(), clusterId, status)
}
func (s *SetupDotPy) Freeze(ctx context.Context) (Environment, error) {
out, err := s.Pip(ctx, "freeze")
if err != nil {
return nil, err
}
env := Environment{}
deps := strings.Split(out, "\n")
for _, raw := range deps {
env = append(env, DependencyFromSpec(raw))
}
return env, nil
}
func (s *SetupDotPy) LocalArtifacts(ctx context.Context, prj flavor.Project) (flavor.Artifacts, error) {
dist, err := s.readDistribution(ctx, prj)
if err != nil {
return nil, err
}
all := flavor.Artifacts{}
// install dependencies for the wheel to run
for _, dependency := range dist.InstallRequires {
if strings.HasPrefix(dependency, "pyspark") {
// pyspark will conflict with DBR
continue
}
all = append(all, flavor.Artifact{
Flavor: s,
Library: libraries.Library{
Pypi: &libraries.PythonPyPiLibrary{
Package: dependency,
},
},
})
}
s.wheelName = dist.WheelName()
all = append(all, flavor.Artifact{
Flavor: s,
Library: libraries.Library{
Whl: fmt.Sprintf("%s/.databricks/dist/%s", prj.Root(), s.wheelName),
},
})
return all, nil
}
// Build creates a python wheel, while keeping project root in a clean state, removing the need
// to execute rm -fr dist build *.egg-info after each build
func (s *SetupDotPy) Build(ctx context.Context, prj flavor.Project, status func(string)) error {
status(fmt.Sprintf("Building %s", s.wheelName))
ctx = spawn.WithRoot(ctx, filepath.Dir(s.setupPyLoc(prj)))
_, err := s.Py(ctx, "setup.py",
// see https://github.com/pypa/setuptools/blob/main/setuptools/_distutils/command/build.py#L23-L31
"build", "--build-lib=.databricks/build/lib", "--build-base=.databricks/build",
// see https://github.com/pypa/setuptools/blob/main/setuptools/command/egg_info.py#L167-L168
"egg_info", "--egg-base=.databricks",
// see https://github.com/pypa/wheel/blob/main/src/wheel/bdist_wheel.py#L140
"bdist_wheel", "--dist-dir=.databricks/dist")
return err
}
// Py calls project-specific Python interpreter from the virtual env from project root dir
func (s *SetupDotPy) Py(ctx context.Context, args ...string) (string, error) {
if s.venv == "" {
return "", fmt.Errorf("virtualenv not detected")
}
out, err := spawn.ExecAndPassErr(ctx, fmt.Sprintf("%s/bin/python3", s.venv), args...)
if err != nil {
// current error message chain is longer:
// failed to call {pyExec} __non_existing__.py: {pyExec}: can't open
// ... file '{pwd}/__non_existing__.py': [Errno 2] No such file or directory"
// probably we'll need to make it shorter:
// can't open file '$PWD/__non_existing__.py': [Errno 2] No such file or directory
return "", err
}
return strings.Trim(string(out), "\n\r"), nil
}
func (s *SetupDotPy) Pip(ctx context.Context, args ...string) (string, error) {
return s.Py(ctx, append([]string{"-m", "pip"}, args...)...)
}
// fastDetectVirtualEnv performs very quick detection, by running over top level directories only
func (s *SetupDotPy) fastDetectVirtualEnv(root string) (string, error) {
wdf, err := os.Open(root)
if err != nil {
return "", err
}
files, err := wdf.ReadDir(0)
if err != nil {
return "", err
}
// virtual env is most likely in dot-directory
slices.SortFunc(files, func(a, b fs.DirEntry) bool {
return a.Name() < b.Name()
})
for _, v := range files {
if !v.IsDir() {
continue
}
candidate := fmt.Sprintf("%s/%s", root, v.Name())
_, err = os.Stat(fmt.Sprintf("%s/pyvenv.cfg", candidate))
if errors.Is(err, os.ErrNotExist) {
continue
}
if err != nil {
return "", err
}
return candidate, nil
}
return "", nil
}

View File

@ -1,26 +1,23 @@
package python package py
import ( import (
"context" "context"
"fmt" "fmt"
"io"
"log" "log"
"os" "os"
"path" "path"
"strings" "strings"
"github.com/databricks/bricks/project" "github.com/databricks/bricks/python"
"github.com/databricks/bricks/utilities"
) )
func BuildWheel(ctx context.Context, dir string) (string, error) { func BuildWheel(ctx context.Context, dir string) (string, error) {
defer chdirAndBack(dir)()
// remove previous dist leak // remove previous dist leak
os.RemoveAll("dist") os.RemoveAll("dist")
// remove all other irrelevant traces // remove all other irrelevant traces
silentlyCleanupWheelFolder(".") silentlyCleanupWheelFolder(".")
// call simple wheel builder. we may need to pip install wheel as well // call simple wheel builder. we may need to pip install wheel as well
out, err := Py(ctx, "setup.py", "bdist_wheel") out, err := python.Py(ctx, "setup.py", "bdist_wheel")
if err != nil { if err != nil {
return "", err return "", err
} }
@ -50,7 +47,6 @@ func UploadWheelToDBFSWithPEP503(ctx context.Context, dir string) (string, error
if err != nil { if err != nil {
return "", err return "", err
} }
defer chdirAndBack(dir)()
dist, err := ReadDistribution(ctx) dist, err := ReadDistribution(ctx)
if err != nil { if err != nil {
return "", err return "", err
@ -61,28 +57,29 @@ func UploadWheelToDBFSWithPEP503(ctx context.Context, dir string) (string, error
// PEP503 indexes can be rolled out to clusters via checksummed global init script, that creates // PEP503 indexes can be rolled out to clusters via checksummed global init script, that creates
// a driver/worker `/etc/pip.conf` with FUSE-mounted file:///dbfs/FileStore/wheels/simple/.. // a driver/worker `/etc/pip.conf` with FUSE-mounted file:///dbfs/FileStore/wheels/simple/..
// extra index URLs. See more pointers at https://stackoverflow.com/q/30889494/277035 // extra index URLs. See more pointers at https://stackoverflow.com/q/30889494/277035
dbfsLoc := fmt.Sprintf("%s/%s/%s", DBFSWheelLocation, dist.NormalizedName(), path.Base(wheel)) _ = fmt.Sprintf("%s/%s/%s", DBFSWheelLocation, dist.NormalizedName(), path.Base(wheel))
return "", err
wsc := project.Get(ctx).WorkspacesClient() // wsc := project.Get(ctx).WorkspacesClient()
wf, err := os.Open(wheel) // wf, err := os.Open(wheel)
if err != nil { // if err != nil {
return "", err // return "", err
} // }
defer wf.Close() // defer wf.Close()
raw, err := io.ReadAll(wf) // raw, err := io.ReadAll(wf)
if err != nil { // if err != nil {
return "", err // return "", err
} // }
// err = dbfs.Create(dbfsLoc, raw, true) // // err = dbfs.Create(dbfsLoc, raw, true)
err = utilities.CreateDbfsFile(ctx, // err = utilities.CreateDbfsFile(ctx,
wsc, // wsc,
dbfsLoc, // dbfsLoc,
raw, // raw,
true, // true,
) // )
// TODO: maintain PEP503 compliance and update meta-files: // // TODO: maintain PEP503 compliance and update meta-files:
// ${DBFSWheelLocation}/index.html and ${DBFSWheelLocation}/${NormalizedName}/index.html // // ${DBFSWheelLocation}/index.html and ${DBFSWheelLocation}/${NormalizedName}/index.html
return dbfsLoc, err // return dbfsLoc, err
} }
func silentlyCleanupWheelFolder(dir string) { func silentlyCleanupWheelFolder(dir string) {
@ -116,11 +113,3 @@ func silentChildWithSuffix(dir, suffix string) string {
} }
return "" return ""
} }
func chdirAndBack(dir string) func() {
wd, _ := os.Getwd()
os.Chdir(dir)
return func() {
os.Chdir(wd)
}
}

View File

@ -1,4 +1,4 @@
package python package py
import ( import (
"context" "context"

View File

@ -0,0 +1,55 @@
package script
import (
"context"
"fmt"
"os"
"github.com/databricks/bricks/lib/flavor"
"github.com/databricks/bricks/lib/spawn"
"github.com/databricks/databricks-sdk-go/databricks"
)
type Script struct {
OnInit string `json:"init,omitempty"`
OnDeploy string `json:"deploy,omitempty"`
}
// Detected returns true if any if the scripts are configured
func (s *Script) Detected(p flavor.Project) bool {
return s.OnInit != "" || s.OnDeploy != ""
}
// TODO: move to a separate interface
func (s *Script) LocalArtifacts(ctx context.Context, p flavor.Project) (flavor.Artifacts, error) {
return nil, nil
}
func (s *Script) Build(ctx context.Context, p flavor.Project, status func(string)) error {
if s.OnDeploy == "" {
return nil
}
cfg := p.WorkspacesClient().Config
err := cfg.EnsureResolved()
if err != nil {
return fmt.Errorf("config: %w", err)
}
for _, a := range databricks.ConfigAttributes {
if len(a.EnvVars) != 1 {
continue
}
v := a.GetString(cfg)
if v == "" {
continue
}
// set environment variables of the current process to propagate
// the authentication credentials
os.Setenv(a.EnvVars[0], v)
}
out, err := spawn.ExecAndPassErr(ctx, "/bin/sh", "-c", s.OnDeploy)
if err != nil {
println(string(out))
return fmt.Errorf("failed: %s", s.OnDeploy)
}
return nil
}

81
lib/spawn/exec.go Normal file
View File

@ -0,0 +1,81 @@
package spawn
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"log"
"os/exec"
"runtime"
"strings"
)
type cxtKey int
const (
prjRoot cxtKey = iota
)
func WithRoot(ctx context.Context, root string) context.Context {
return context.WithValue(ctx, prjRoot, root)
}
func ExecAndPassErr(ctx context.Context, name string, args ...string) ([]byte, error) {
log.Printf("[DEBUG] Running %s %s", name, strings.Join(args, " "))
reader, writer := io.Pipe()
out := bytes.NewBuffer([]byte{}) // add option to route to Stdout in verbose mode
go io.Copy(out, reader)
cmd := exec.CommandContext(ctx, name, args...)
cmd.Stdout = writer
cmd.Stderr = writer
root, ok := ctx.Value(prjRoot).(string)
if ok {
cmd.Dir = root
}
err := cmd.Run()
_ = writer.Close()
_ = reader.Close()
if err != nil {
return nil, fmt.Errorf(trimmedS(out.Bytes()))
}
return []byte(trimmedS(out.Bytes())), nil
}
func DetectExecutable(ctx context.Context, exec string) (string, error) {
detector := "which"
if runtime.GOOS == "windows" {
detector = "where.exe"
}
out, err := ExecAndPassErr(ctx, detector, exec)
if err != nil {
return "", err
}
return trimmedS(out), nil
}
func nicerErr(err error) error {
if err == nil {
return nil
}
if ee, ok := err.(*exec.ExitError); ok {
errMsg := trimmedS(ee.Stderr)
if errMsg == "" {
errMsg = err.Error()
}
return errors.New(errMsg)
}
return err
}
func trimmedS(bytes []byte) string {
return strings.Trim(string(bytes), "\n\r")
}

14
lib/spawn/exec_test.go Normal file
View File

@ -0,0 +1,14 @@
package spawn
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
)
func TestExecAndPassError(t *testing.T) {
_, err := ExecAndPassErr(context.Background(), "which", "__non_existing__")
assert.EqualError(t, err, "exit status 1")
}

53
lib/ui/spinner.go Normal file
View File

@ -0,0 +1,53 @@
package ui
import (
"context"
"math/rand"
"time"
"github.com/briandowns/spinner"
"github.com/fatih/color"
)
// https://github.com/leaanthony/spinner alternative
func Spinner(label string, fn func(*spinner.Spinner) error, final string) error {
rand.Seed(time.Now().UnixMilli())
s := spinner.New(spinner.CharSets[rand.Intn(11)], 200*time.Millisecond)
_ = s.Color("green")
s.Start()
s.Prefix = " "
s.Suffix = " " + label
err := fn(s)
if err == nil {
s.FinalMSG = color.GreenString(" ✓ %s", final) // or ✓
} else {
s.FinalMSG = color.RedString(" ✗ %s", err) // or
}
s.Stop()
println("")
return err
}
type Stage struct {
InProgress string
Callback func(context.Context, func(string)) error
Complete string
}
func SpinStages(ctx context.Context, stages []Stage) error {
for _, v := range stages {
err := Spinner(v.InProgress, func(s *spinner.Spinner) error {
updateMsg := func(msg string) {
// TODO: print to stdout for non-tty
s.Suffix = " " + msg
}
return v.Callback(ctx, updateMsg)
}, v.Complete)
if err != nil {
return err
}
}
return nil
}

View File

@ -2,6 +2,7 @@ package main
import ( import (
_ "github.com/databricks/bricks/cmd/api" _ "github.com/databricks/bricks/cmd/api"
_ "github.com/databricks/bricks/cmd/build"
_ "github.com/databricks/bricks/cmd/configure" _ "github.com/databricks/bricks/cmd/configure"
_ "github.com/databricks/bricks/cmd/fs" _ "github.com/databricks/bricks/cmd/fs"
_ "github.com/databricks/bricks/cmd/init" _ "github.com/databricks/bricks/cmd/init"

View File

@ -9,6 +9,10 @@ import (
"reflect" "reflect"
"github.com/databricks/bricks/folders" "github.com/databricks/bricks/folders"
"github.com/databricks/bricks/lib/flavor/mvn"
"github.com/databricks/bricks/lib/flavor/notebooks"
"github.com/databricks/bricks/lib/flavor/py"
"github.com/databricks/bricks/lib/flavor/script"
"github.com/databricks/databricks-sdk-go/service/clusters" "github.com/databricks/databricks-sdk-go/service/clusters"
"github.com/ghodss/yaml" "github.com/ghodss/yaml"
@ -38,6 +42,11 @@ type Config struct {
// development-time vs deployment-time resources // development-time vs deployment-time resources
DevCluster *clusters.ClusterInfo `json:"dev_cluster,omitempty"` DevCluster *clusters.ClusterInfo `json:"dev_cluster,omitempty"`
Maven *mvn.Maven `json:"mvn,omitempty"`
SetupPy *py.SetupDotPy `json:"python,omitempty"`
Script *script.Script `json:"script,omitempty"`
Notebooks *notebooks.Notebooks `json:"notebooks,omitempty"`
// Assertions defines a list of configurations expected to be applied // Assertions defines a list of configurations expected to be applied
// to the workspace by a higher-privileged user (or service principal) // to the workspace by a higher-privileged user (or service principal)
// in order for the deploy command to work, as individual project teams // in order for the deploy command to work, as individual project teams

View File

@ -1,81 +0,0 @@
package project
// type Flavor interface {
// // Name returns a tuple of flavor key and readable name
// Name() (string, string)
// // Detected returns true on successful metadata checks
// Detected() bool
// // Build triggers packaging subprocesses
// Build(context.Context) error
// // TODO: Init() Questions
// // TODO: Deploy(context.Context) error
// }
// var _ Flavor = PythonWheel{}
// type PythonWheel struct{}
// func (pw PythonWheel) Name() (string, string) {
// return "wheel", "Python Wheel"
// }
// func (pw PythonWheel) Detected() bool {
// root, err := findProjectRoot()
// if err != nil {
// return false
// }
// _, err = os.Stat(fmt.Sprintf("%s/setup.py", root))
// return err == nil
// }
// func (pw PythonWheel) Build(ctx context.Context) error {
// defer toTheRootAndBack()()
// // do subprocesses or https://github.com/go-python/cpy3
// // it all depends on complexity and binary size
// // TODO: detect if there's an .venv here and call setup.py with ENV vars of it
// // TODO: where.exe python (WIN) / which python (UNIX)
// cmd := exec.CommandContext(ctx, "python", "setup.py", "bdist-wheel")
// err := cmd.Run()
// if err != nil {
// return err
// }
// return nil
// }
// func toTheRootAndBack() func() {
// wd, _ := os.Getwd()
// root, _ := findProjectRoot()
// os.Chdir(root)
// return func() {
// os.Chdir(wd)
// }
// }
// var _ Flavor = PythonNotebooks{}
// type PythonNotebooks struct{}
// func (n PythonNotebooks) Name() (string, string) {
// // or just "notebooks", as we might shuffle in scala?...
// return "python-notebooks", "Python Notebooks"
// }
// func (n PythonNotebooks) Detected() bool {
// // TODO: Steps:
// // - get all filenames
// // - read first X bytes from random 10 files and check
// // if they're "Databricks Notebook Source"
// return false
// }
// func (n PythonNotebooks) Build(ctx context.Context) error {
// // TODO: perhaps some linting?..
// return nil
// }
// func (n PythonNotebooks) Deploy(ctx context.Context) error {
// // TODO: recursively upload notebooks to a given workspace path
// return nil
// }

View File

@ -3,8 +3,15 @@ package project
import ( import (
"context" "context"
"fmt" "fmt"
"os"
"sync" "sync"
"github.com/databricks/bricks/lib/flavor"
"github.com/databricks/bricks/lib/flavor/mvn"
"github.com/databricks/bricks/lib/flavor/notebooks"
"github.com/databricks/bricks/lib/flavor/py"
"github.com/databricks/bricks/lib/flavor/script"
"github.com/databricks/bricks/lib/spawn"
"github.com/databricks/databricks-sdk-go/databricks" "github.com/databricks/databricks-sdk-go/databricks"
"github.com/databricks/databricks-sdk-go/service/clusters" "github.com/databricks/databricks-sdk-go/service/clusters"
"github.com/databricks/databricks-sdk-go/service/commands" "github.com/databricks/databricks-sdk-go/service/commands"
@ -23,6 +30,9 @@ type project struct {
environment *Environment environment *Environment
wsc *workspaces.WorkspacesClient wsc *workspaces.WorkspacesClient
me *scim.User me *scim.User
artifacts flavor.Artifacts
flavors []flavor.Flavor
} }
// Configure is used as a PreRunE function for all commands that // Configure is used as a PreRunE function for all commands that
@ -60,16 +70,37 @@ func Initialize(ctx context.Context, root, env string) (context.Context, error)
if !ok { if !ok {
return nil, fmt.Errorf("environment [%s] not defined", env) return nil, fmt.Errorf("environment [%s] not defined", env)
} }
// TODO: maybe we do Flavor#Detect here
if config.Maven == nil {
config.Maven = &mvn.Maven{}
}
if config.SetupPy == nil {
config.SetupPy = &py.SetupDotPy{}
}
if config.Script == nil {
config.Script = &script.Script{}
}
if config.Notebooks == nil {
config.Notebooks = &notebooks.Notebooks{}
}
p := project{ p := project{
root: root, root: root,
env: env, env: env,
config: &config, config: &config,
environment: &environment, environment: &environment,
flavors: []flavor.Flavor{
config.Maven,
config.SetupPy,
config.Script,
config.Notebooks,
},
} }
p.initializeWorkspacesClient(ctx) p.initializeWorkspacesClient(ctx)
// make all spawn.ExecAndPassErr executed in project root
// TODO: pass verbosity flag from somewhere
ctx = spawn.WithRoot(ctx, root)
return context.WithValue(ctx, &projectKey, &p), nil return context.WithValue(ctx, &projectKey, &p), nil
} }
@ -131,6 +162,7 @@ func (p *project) DeploymentIsolationPrefix() string {
if p.config.Isolation == None { if p.config.Isolation == None {
return p.config.Name return p.config.Name
} }
// TODO: git branch
if p.config.Isolation == Soft { if p.config.Isolation == Soft {
me, err := p.Me() me, err := p.Me()
if err != nil { if err != nil {
@ -200,11 +232,16 @@ func RunPythonOnDev(ctx context.Context, command string) common.CommandResults {
} }
*/ */
const bricksClusterId = "BRICKS_CLUSTER_ID"
// TODO: Add safe access to p.project and p.project.DevCluster that throws errors if // TODO: Add safe access to p.project and p.project.DevCluster that throws errors if
// the fields are not defined properly // the fields are not defined properly
func (p *project) GetDevelopmentClusterId(ctx context.Context) (clusterId string, err error) { func (p *project) GetDevelopmentClusterId(ctx context.Context) (clusterId string, err error) {
clusterId = p.config.DevCluster.ClusterId clusterId = p.config.DevCluster.ClusterId
clusterName := p.config.DevCluster.ClusterName clusterName := p.config.DevCluster.ClusterName
if clusterId == "" {
clusterId = os.Getenv(bricksClusterId)
}
if clusterId != "" { if clusterId != "" {
return return
} else if clusterName != "" { } else if clusterName != "" {

146
project/project_build.go Normal file
View File

@ -0,0 +1,146 @@
package project
import (
"context"
"encoding/base64"
"fmt"
"io"
"os"
"path/filepath"
"github.com/databricks/bricks/lib/flavor"
"github.com/databricks/databricks-sdk-go/databricks/apierr"
"github.com/databricks/databricks-sdk-go/service/workspace"
)
var b64 = base64.StdEncoding
func (p *project) LocalArtifacts(ctx context.Context) (flavor.Artifacts, error) {
for _, f := range p.flavors {
if !f.Detected(p) {
continue
}
arts, err := f.LocalArtifacts(ctx, p)
if err != nil {
return nil, err
}
p.artifacts = append(p.artifacts, arts...)
}
return p.artifacts, nil
}
type preparable interface {
Prepare(ctx context.Context, prj flavor.Project, status func(string)) error
}
func (p *project) Prepare(ctx context.Context, status func(string)) error {
for _, f := range p.flavors {
if !f.Detected(p) {
continue
}
prep, ok := f.(preparable)
if !ok {
continue
}
err := prep.Prepare(ctx, p, status)
if err != nil {
return fmt.Errorf("prepare: %w", err)
}
}
return nil
}
type buildable interface {
Build(ctx context.Context, prj flavor.Project, status func(string)) error
}
func (p *project) Build(ctx context.Context, status func(string)) error {
for _, f := range p.flavors {
if !f.Detected(p) {
continue
}
b, ok := f.(buildable)
if !ok {
continue
}
// TODO: compare last modified artifact vs last modified remote
// TODO: compare last modified artifact vs last modified fileset
err := b.Build(ctx, p, status)
if err != nil {
return fmt.Errorf("build: %w", err)
}
}
return nil
}
func (p *project) Upload(ctx context.Context, status func(string)) error {
for _, a := range p.artifacts {
err := p.upload(ctx, a, status)
if err != nil {
return fmt.Errorf("upload: %w", err)
}
}
return nil
}
func (p *project) upload(ctx context.Context, a flavor.Artifact, status func(string)) error {
kind, local, remote := p.remotePath(a)
if remote == "" {
return nil
}
localBasename := filepath.Base(local)
remoteDir := filepath.Dir(remote)
status(fmt.Sprintf("Uploading %s to %s", localBasename, remoteDir))
file, err := os.Open(local)
if err != nil {
return fmt.Errorf("open: %w", err)
}
defer file.Close()
if kind != flavor.LocalNotebook {
return p.wsc.Dbfs.Overwrite(ctx, remote, file)
}
format, ok := a.NotebookInfo()
if !ok {
return fmt.Errorf("unknown notebook: %s", a)
}
raw, err := io.ReadAll(file)
if err != nil {
return fmt.Errorf("read: %w", err)
}
_, err = p.wsc.Workspace.GetStatusByPath(ctx, remoteDir)
if apierr.IsMissing(err) {
err = p.wsc.Workspace.MkdirsByPath(ctx, remoteDir)
if err != nil {
return err
}
}
return p.wsc.Workspace.Import(ctx, workspace.Import{
Path: remote,
Overwrite: format.Overwrite,
Format: format.Format,
Language: format.Language,
Content: b64.EncodeToString(raw),
})
}
func (p *project) remotePath(a flavor.Artifact) (flavor.Kind, string, string) {
libsFmt := fmt.Sprintf("dbfs:/FileStore/%%s/%s/%%s", p.DeploymentIsolationPrefix())
kind, loc := a.KindAndLocation()
switch kind {
case flavor.LocalJar:
return kind, loc, fmt.Sprintf(libsFmt, "jars", filepath.Base(loc))
case flavor.LocalWheel:
return kind, loc, fmt.Sprintf(libsFmt, "wheels", filepath.Base(loc))
case flavor.LocalEgg:
return kind, loc, fmt.Sprintf(libsFmt, "eggs", filepath.Base(loc))
case flavor.LocalNotebook:
me, err := p.Me()
if err != nil {
panic(err)
}
return kind, loc, fmt.Sprintf("/Users/%s/%s/%s",
me.UserName, p.config.Name, a.Notebook.RemoteRelative)
default:
return kind, loc, ""
}
}

134
project/project_install.go Normal file
View File

@ -0,0 +1,134 @@
package project
import (
"context"
"fmt"
"path/filepath"
"sort"
"strings"
"github.com/databricks/bricks/lib/flavor"
"github.com/databricks/databricks-sdk-go/retries"
"github.com/databricks/databricks-sdk-go/service/clusters"
"github.com/databricks/databricks-sdk-go/service/libraries"
)
type runsOnCluster interface {
RequiresCluster() bool
}
type restartable interface {
RequiresRestart() bool
}
func (p *project) RequiresCluster() bool {
for _, f := range p.flavors {
if !f.Detected(p) {
continue
}
r, ok := f.(runsOnCluster)
if !ok {
continue
}
if r.RequiresCluster() {
return true
}
}
return false
}
func (p *project) RequiresRestart() bool {
for _, f := range p.flavors {
if !f.Detected(p) {
continue
}
r, ok := f.(restartable)
if !ok {
continue
}
if r.RequiresRestart() {
return true
}
}
return false
}
func (p *project) Install(ctx context.Context, status func(string)) error {
if !p.RequiresCluster() {
// nothing to do
return nil
}
clusterId, err := p.GetDevelopmentClusterId(ctx)
if err != nil {
// cluster not found is also fine, abort execution
return nil
}
info, err := p.wsc.Clusters.GetByClusterId(ctx, clusterId)
if err != nil {
// TODO: special behavior for (auto)deleted clusters
// re-create, if possible?
return err
}
if p.RequiresRestart() && info.IsRunningOrResizing() {
_, err = p.wsc.Clusters.RestartAndWait(ctx, clusters.RestartCluster{
ClusterId: clusterId,
}, func(i *retries.Info[clusters.ClusterInfo]) {
status(i.Info.StateMessage)
})
} else if !info.IsRunningOrResizing() {
_, err = p.wsc.Clusters.StartByClusterIdAndWait(ctx, clusterId,
func(i *retries.Info[clusters.ClusterInfo]) {
status(i.Info.StateMessage)
})
}
if err != nil {
return err
}
if !p.artifacts.HasLibraries() {
return nil
}
var libs []libraries.Library
for _, a := range p.artifacts {
k, _, remote := p.remotePath(a)
switch k {
case flavor.LocalJar:
libs = append(libs, libraries.Library{Jar: remote})
case flavor.LocalWheel:
libs = append(libs, libraries.Library{Whl: remote})
case flavor.LocalEgg:
libs = append(libs, libraries.Library{Egg: remote})
case flavor.RegistryLibrary:
libs = append(libs, a.Library)
default:
continue
}
}
// TODO: uninstall previous versions of libraries
return p.wsc.Libraries.UpdateAndWait(ctx, libraries.Update{
ClusterId: clusterId,
Install: libs,
}, func(i *retries.Info[libraries.ClusterLibraryStatuses]) {
byStatus := map[string][]string{}
for _, lib := range i.Info.LibraryStatuses {
if lib.IsLibraryForAllClusters {
continue
}
if lib.Status == libraries.LibraryFullStatusStatusInstalled ||
lib.Status == libraries.LibraryFullStatusStatusUninstallOnRestart {
continue
}
name := lib.Library.String()
if strings.HasPrefix(name, "jar:dbfs:") || strings.HasPrefix(name, "whl:dbfs:") {
name = filepath.Base(name)
}
byStatus[string(lib.Status)] = append(byStatus[string(lib.Status)], name)
}
msg := []string{}
for k, v := range byStatus {
sort.Strings(v)
msg = append(msg, fmt.Sprintf("%s (%s)", k, strings.Join(v, ", ")))
}
sort.Strings(msg)
status(strings.Join(msg, ", "))
})
}

View File

@ -1,24 +0,0 @@
package python
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
)
func TestFreeze(t *testing.T) {
env, err := Freeze(context.Background())
assert.NoError(t, err)
assert.Greater(t, len(env), 1)
assert.True(t, env.Has("urllib3"))
}
func TestPyInlineX(t *testing.T) {
defer chdirAndBack("testdata/simple-python-wheel")()
dist, err := ReadDistribution(context.Background())
assert.NoError(t, err)
assert.Equal(t, "dummy", dist.Name)
assert.Equal(t, "dummy", dist.Packages[0])
assert.True(t, dist.InstallEnvironment().Has("requests"))
}

View File

@ -5,21 +5,23 @@ import (
"errors" "errors"
"fmt" "fmt"
"os" "os"
"os/exec"
"runtime"
"strings" "strings"
"github.com/databricks/bricks/lib/spawn"
"github.com/databricks/databricks-sdk-go/service/commands"
) )
func PyInline(ctx context.Context, inlinePy string) (string, error) { func PyInline(ctx context.Context, inlinePy string) (string, error) {
return Py(ctx, "-c", TrimLeadingWhitespace(inlinePy)) return Py(ctx, "-c", commands.TrimLeadingWhitespace(inlinePy))
} }
func Py(ctx context.Context, script string, args ...string) (string, error) { // Py calls system-detected Python3 executable
func Py(ctx context.Context, args ...string) (string, error) {
py, err := detectExecutable(ctx) py, err := detectExecutable(ctx)
if err != nil { if err != nil {
return "", err return "", err
} }
out, err := execAndPassErr(ctx, py, append([]string{script}, args...)...) out, err := spawn.ExecAndPassErr(ctx, py, args...)
if err != nil { if err != nil {
// current error message chain is longer: // current error message chain is longer:
// failed to call {pyExec} __non_existing__.py: {pyExec}: can't open // failed to call {pyExec} __non_existing__.py: {pyExec}: can't open
@ -28,7 +30,7 @@ func Py(ctx context.Context, script string, args ...string) (string, error) {
// can't open file '$PWD/__non_existing__.py': [Errno 2] No such file or directory // can't open file '$PWD/__non_existing__.py': [Errno 2] No such file or directory
return "", err return "", err
} }
return trimmedS(out), nil return strings.Trim(string(out), "\n\r"), nil
} }
func createVirtualEnv(ctx context.Context) error { func createVirtualEnv(ctx context.Context) error {
@ -38,11 +40,7 @@ func createVirtualEnv(ctx context.Context) error {
// python3 -m build -w // python3 -m build -w
// https://packaging.python.org/en/latest/tutorials/packaging-projects/ // https://packaging.python.org/en/latest/tutorials/packaging-projects/
func detectVirtualEnv() (string, error) { func detectVirtualEnv(wd string) (string, error) {
wd, err := os.Getwd()
if err != nil {
return "", err
}
wdf, err := os.Open(wd) wdf, err := os.Open(wd)
if err != nil { if err != nil {
return "", err return "", err
@ -74,69 +72,10 @@ func detectExecutable(ctx context.Context) (string, error) {
if pyExec != "" { if pyExec != "" {
return pyExec, nil return pyExec, nil
} }
detector := "which" detected, err := spawn.DetectExecutable(ctx, "python3")
if runtime.GOOS == "windows" {
detector = "where.exe"
}
out, err := execAndPassErr(ctx, detector, "python3")
if err != nil { if err != nil {
return "", err return "", err
} }
pyExec = trimmedS(out) pyExec = detected
return pyExec, nil return pyExec, nil
} }
func execAndPassErr(ctx context.Context, name string, args ...string) ([]byte, error) {
// TODO: move out to a separate package, once we have Maven integration
out, err := exec.CommandContext(ctx, name, args...).Output()
return out, nicerErr(err)
}
func nicerErr(err error) error {
if err == nil {
return nil
}
if ee, ok := err.(*exec.ExitError); ok {
errMsg := trimmedS(ee.Stderr)
if errMsg == "" {
errMsg = err.Error()
}
return errors.New(errMsg)
}
return err
}
func trimmedS(bytes []byte) string {
return strings.Trim(string(bytes), "\n\r")
}
// TrimLeadingWhitespace removes leading whitespace
// function copied from Databricks Terraform provider
func TrimLeadingWhitespace(commandStr string) (newCommand string) {
lines := strings.Split(strings.ReplaceAll(commandStr, "\t", " "), "\n")
leadingWhitespace := 1<<31 - 1
for _, line := range lines {
for pos, char := range line {
if char == ' ' || char == '\t' {
continue
}
// first non-whitespace character
if pos < leadingWhitespace {
leadingWhitespace = pos
}
// is not needed further
break
}
}
for i := 0; i < len(lines); i++ {
if lines[i] == "" || strings.Trim(lines[i], " \t") == "" {
continue
}
if len(lines[i]) < leadingWhitespace {
newCommand += lines[i] + "\n" // or not..
} else {
newCommand += lines[i][leadingWhitespace:] + "\n"
}
}
return
}

View File

@ -6,14 +6,10 @@ import (
"os" "os"
"testing" "testing"
"github.com/databricks/bricks/lib/spawn"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
) )
func TestExecAndPassError(t *testing.T) {
_, err := execAndPassErr(context.Background(), "which", "__non_existing__")
assert.EqualError(t, err, "exit status 1")
}
func TestDetectPython(t *testing.T) { func TestDetectPython(t *testing.T) {
pyExec = "" pyExec = ""
py, err := detectExecutable(context.Background()) py, err := detectExecutable(context.Background())
@ -30,37 +26,25 @@ func TestDetectPythonCache(t *testing.T) {
} }
func TestDetectVirtualEnvFalse(t *testing.T) { func TestDetectVirtualEnvFalse(t *testing.T) {
venvDir, err := detectVirtualEnv() wd, err := os.Getwd()
assert.NoError(t, err)
venvDir, err := detectVirtualEnv(wd)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, "", venvDir) assert.Equal(t, "", venvDir)
} }
func TestMakeDetectableVenv(t *testing.T) { func TestMakeDetectableVenv(t *testing.T) {
var temp string temp := t.TempDir()
defer testTempdir(t, &temp)() ctx := spawn.WithRoot(context.Background(), temp)
// TODO: rewrite with t.TempDir() and arguments err := createVirtualEnv(ctx)
err := createVirtualEnv(context.Background())
assert.NoError(t, err) assert.NoError(t, err)
venv, err := detectVirtualEnv() venv, err := detectVirtualEnv(temp)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, fmt.Sprintf("%s/.venv", temp), venv) assert.Equal(t, fmt.Sprintf("%s/.venv", temp), venv)
} }
func testTempdir(t *testing.T, dir *string) func() {
wd, _ := os.Getwd()
temp, err := os.MkdirTemp(os.TempDir(), "brickstest")
assert.NoError(t, err)
os.Chdir(temp)
wd2, _ := os.Getwd()
*dir = wd2
return func() {
os.Chdir(wd)
os.RemoveAll(temp)
}
}
func TestPyError(t *testing.T) { func TestPyError(t *testing.T) {
_, err := Py(context.Background(), "__non_existing__.py") _, err := Py(context.Background(), "__non_existing__.py")
assert.Contains(t, err.Error(), "can't open file") assert.Contains(t, err.Error(), "can't open file")

View File

@ -13,6 +13,8 @@ import (
// move to go sdk / replace with utility function once // move to go sdk / replace with utility function once
// https://github.com/databricks/databricks-sdk-go/issues/57 is Done // https://github.com/databricks/databricks-sdk-go/issues/57 is Done
// Tracked in https://github.com/databricks/bricks/issues/25 // Tracked in https://github.com/databricks/bricks/issues/25
//
// Deprecated: use w.Dbfs.Overwrite
func CreateDbfsFile(ctx context.Context, func CreateDbfsFile(ctx context.Context,
wsc *workspaces.WorkspacesClient, wsc *workspaces.WorkspacesClient,
path string, path string,
@ -59,6 +61,7 @@ func CreateDbfsFile(ctx context.Context,
return nil return nil
} }
// Deprecated: use w.Dbfs.Open
func ReadDbfsFile(ctx context.Context, func ReadDbfsFile(ctx context.Context,
wsc *workspaces.WorkspacesClient, wsc *workspaces.WorkspacesClient,
path string, path string,