From 8cab9d92f33e929e902c965026b8b7339f42c9f7 Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Mon, 31 Oct 2022 12:09:52 +0100 Subject: [PATCH] `bricks deploy` MVP --- .goreleaser.yaml | 4 +- cmd/build/build.go | 43 +++ cmd/build/build_test.go | 30 ++ cmd/build/exp/main.go | 36 ++ doc/cmd-deploy.md | 4 + doc/configuration.md | 45 +++ doc/project-flavors.md | 13 + doc/project-lifecycle.md | 28 ++ ext/databricks-sdk-go | 2 +- go.mod | 12 +- go.sum | 18 +- lib/dbr/info.go | 88 +++++ lib/fileset/file.go | 73 +++++ lib/fileset/fileset.go | 98 ++++++ lib/flavor/mvn/mvn.go | 98 ++++++ lib/flavor/notebooks/notebooks.go | 41 +++ lib/flavor/pkg.go | 120 +++++++ {python => lib/flavor/py}/env.go | 51 +-- lib/flavor/py/env_test.go | 18 + lib/flavor/py/setup_py.go | 307 ++++++++++++++++++ .../simple-python-wheel/databricks.yml | 0 .../simple-python-wheel/dummy/__init__.py | 0 .../simple-python-wheel/dummy/transforms.py | 0 .../py}/testdata/simple-python-wheel/setup.py | 0 {python => lib/flavor/py}/wheel.go | 61 ++-- {python => lib/flavor/py}/wheel_test.go | 2 +- lib/flavor/script/script.go | 55 ++++ lib/spawn/exec.go | 81 +++++ lib/spawn/exec_test.go | 14 + lib/ui/spinner.go | 53 +++ main.go | 1 + project/config.go | 9 + project/flavor.go | 81 ----- project/project.go | 39 ++- project/project_build.go | 146 +++++++++ project/project_install.go | 134 ++++++++ python/env_test.go | 24 -- python/runner.go | 83 +---- python/runner_test.go | 32 +- utilities/dbfs.go | 3 + 40 files changed, 1678 insertions(+), 269 deletions(-) create mode 100644 cmd/build/build.go create mode 100644 cmd/build/build_test.go create mode 100644 cmd/build/exp/main.go create mode 100644 doc/cmd-deploy.md create mode 100644 doc/configuration.md create mode 100644 doc/project-flavors.md create mode 100644 doc/project-lifecycle.md create mode 100644 lib/dbr/info.go create mode 100644 lib/fileset/file.go create mode 100644 lib/fileset/fileset.go create mode 100644 lib/flavor/mvn/mvn.go create mode 100644 lib/flavor/notebooks/notebooks.go create mode 100644 lib/flavor/pkg.go rename {python => lib/flavor/py}/env.go (66%) create mode 100644 lib/flavor/py/env_test.go create mode 100644 lib/flavor/py/setup_py.go rename {python => lib/flavor/py}/testdata/simple-python-wheel/databricks.yml (100%) rename {python => lib/flavor/py}/testdata/simple-python-wheel/dummy/__init__.py (100%) rename {python => lib/flavor/py}/testdata/simple-python-wheel/dummy/transforms.py (100%) rename {python => lib/flavor/py}/testdata/simple-python-wheel/setup.py (100%) rename {python => lib/flavor/py}/wheel.go (75%) rename {python => lib/flavor/py}/wheel_test.go (97%) create mode 100644 lib/flavor/script/script.go create mode 100644 lib/spawn/exec.go create mode 100644 lib/spawn/exec_test.go create mode 100644 lib/ui/spinner.go delete mode 100644 project/flavor.go create mode 100644 project/project_build.go create mode 100644 project/project_install.go delete mode 100644 python/env_test.go diff --git a/.goreleaser.yaml b/.goreleaser.yaml index d3f426a4..65ffc328 100644 --- a/.goreleaser.yaml +++ b/.goreleaser.yaml @@ -22,7 +22,7 @@ builds: goarch: '386' - goos: linux goarch: '386' - binary: '{{ .ProjectName }}_v{{ .Version }}' + binary: '{{ .ProjectName }}' archives: - format: zip name_template: '{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}' @@ -30,7 +30,7 @@ checksum: name_template: '{{ .ProjectName }}_{{ .Version }}_SHA256SUMS' algorithm: sha256 snapshot: - name_template: '{{ incpatch .Version }}-devel' + name_template: '{{ .ProjectName }}' changelog: sort: asc filters: diff --git a/cmd/build/build.go b/cmd/build/build.go new file mode 100644 index 00000000..56831df6 --- /dev/null +++ b/cmd/build/build.go @@ -0,0 +1,43 @@ +package build + +import ( + "fmt" + + "github.com/databricks/bricks/cmd/root" + "github.com/databricks/bricks/lib/ui" + "github.com/databricks/bricks/project" + "github.com/spf13/cobra" +) + +var buildCmd = &cobra.Command{ + Use: "deploy", + Short: "Build and deploy artifacts", + PreRunE: project.Configure, + RunE: func(cmd *cobra.Command, args []string) error { + ctx := cmd.Context() + prj := project.Get(ctx) + // https://github.com/databrickslabs/mosaic - both maven and python + // https://github.com/databrickslabs/arcuate - only python, no DBR needed, but has notebooks + all, err := prj.LocalArtifacts(ctx) + if err != nil { + return err + } + if len(all) == 0 { + return fmt.Errorf("nothing to deploy") + } + err = ui.SpinStages(ctx, []ui.Stage{ + {InProgress: "Preparing", Callback: prj.Prepare, Complete: "Prepared!"}, + {InProgress: "Building", Callback: prj.Build, Complete: "Built!"}, + {InProgress: "Uploading", Callback: prj.Upload, Complete: "Uploaded!"}, + {InProgress: "Installing", Callback: prj.Install, Complete: "Installed!"}, + }) + if err != nil { + return err + } + return nil + }, +} + +func init() { + root.RootCmd.AddCommand(buildCmd) +} diff --git a/cmd/build/build_test.go b/cmd/build/build_test.go new file mode 100644 index 00000000..667d4dd6 --- /dev/null +++ b/cmd/build/build_test.go @@ -0,0 +1,30 @@ +package build + +import ( + "context" + "os" + "testing" + + "github.com/databricks/bricks/cmd/root" + "github.com/stretchr/testify/assert" +) + +func Test(t *testing.T) { + t.Skip() + ctx := context.Background() + os.Setenv("BRICKS_ROOT", "/Users/serge.smertin/git/labs/transpiler") + root.RootCmd.SetArgs([]string{"deploy"}) + + err := root.RootCmd.ExecuteContext(ctx) + assert.NoError(t, err) +} + +func TestArcuate(t *testing.T) { + t.Skip() + ctx := context.Background() + os.Setenv("BRICKS_ROOT", "/Users/serge.smertin/git/labs/arcuate") + root.RootCmd.SetArgs([]string{"deploy"}) + + err := root.RootCmd.ExecuteContext(ctx) + assert.NoError(t, err) +} diff --git a/cmd/build/exp/main.go b/cmd/build/exp/main.go new file mode 100644 index 00000000..871cf600 --- /dev/null +++ b/cmd/build/exp/main.go @@ -0,0 +1,36 @@ +package main + +import ( + "context" + "fmt" + "os" + "time" + + "github.com/databricks/bricks/lib/ui" +) + +func main() { + ctx := context.Background() + err := ui.SpinStages(ctx, []ui.Stage{ + {InProgress: "Building", Callback: func(ctx context.Context, status func(string)) error { + time.Sleep(1 * time.Second) + status("first message") + time.Sleep(1 * time.Second) + status("second message") + time.Sleep(1 * time.Second) + return nil + }, Complete: "Built!"}, + {InProgress: "Uploading", Callback: func(ctx context.Context, status func(string)) error { + status("third message") + time.Sleep(1 * time.Second) + return nil + }, Complete: "Uploaded!"}, + {InProgress: "Installing", Callback: func(ctx context.Context, status func(string)) error { + time.Sleep(1 * time.Second) + return fmt.Errorf("nope") + }, Complete: "Installed!"}, + }) + if err != nil { + os.Exit(1) + } +} diff --git a/doc/cmd-deploy.md b/doc/cmd-deploy.md new file mode 100644 index 00000000..96a5e7f0 --- /dev/null +++ b/doc/cmd-deploy.md @@ -0,0 +1,4 @@ +Deploying workloads +--- + +`bricks deploy` command is used to deploy a [project](project-lifecycle.md) to a target Databricks Workspace. \ No newline at end of file diff --git a/doc/configuration.md b/doc/configuration.md new file mode 100644 index 00000000..f9a5c1de --- /dev/null +++ b/doc/configuration.md @@ -0,0 +1,45 @@ +Configuration +--- + +There are two main configuration concepts of `bricks` CLI: per-project `databricks.yml` and per-machine `~/.databrickscfg` file. + +## `~/.databrickscfg` + +The purpose of this file is hold connectivity profiles with possibly clear-text credentials to Databricks Workspaces or Databricks Accounts. Almost all entries from this configuration file can be set through environment variables. The same configuration file can be read via the official Databricks GoLang SDK and Databricks Python SDK. Legacy Databricks CLI supports reading only `host`, `token`, `username`, and `password` configuration options. + + * `host` _(string)_: Databricks host for either workspace endpoint or Accounts endpoint. Environment: `DATABRICKS_HOST`. + * `account_id` _(string)_: Databricks Account ID for Accounts endpoint. Environment: `DATABRICKS_ACCOUNT_ID`. + * `token` _(string)_: Personal Access Token (PAT). Environment: `DATABRICKS_TOKEN`. + * `username` _(string)_: Username part of basic authentication. Environment: `DATABRICKS_USERNAME`. + * `password` _(string)_: Password part of basic authentication. Environment: `DATABRICKS_PASSWORD`. + * `profile` _(string)_: Connection profile specified within `~/.databrickscfg`. Environment: `DATABRICKS_CONFIG_PROFILE`. + * `config_file` _(string)_: Location of the Databricks CLI credentials file. By default, it is located in `~/.databrickscfg`. Environment: `DATABRICKS_CONFIG_FILE`. + * `google_service_account` _(string)_: Google Compute Platform (GCP) Service Account e-mail used for impersonation in the Default Application Credentials Flow that does not require a password. Environment: `DATABRICKS_GOOGLE_SERVICE_ACCOUNT` + * `google_credentials` _(string)_: GCP Service Account Credentials JSON or the location of these credentials on the local filesustem. Environment: `GOOGLE_CREDENTIALS`. + * `azure_workspace_resource_id` _(string)_: Azure Resource Manager ID for Azure Databricks workspace, which is exhanged for a Host. Environment: `DATABRICKS_AZURE_RESOURCE_ID`. + * `azure_use_msi` _(string)_: Instruct to use Azure Managed Service Identity passwordless authentication flow for Service Principals. Environment: `ARM_USE_MSI`. + * `azure_client_secret` _(string)_: Azure Active Directory Service Principal secret. Environment: `ARM_CLIENT_SECRET`. + * `azure_client_id` _(string)_: Azure Active Directory Service Principal Application ID. Environment: `ARM_CLIENT_ID` + * `azure_tenant_id` _(string)_: Azure Active Directory Tenant ID. Environment: `ARM_TENANT_ID` + * `azure_environment` _(string)_: Azure Environment (Public, UsGov, China, Germany) has specific set of API endpoints. Defaults to `PUBLIC`. Environment: `ARM_ENVIRONMENT`. + * `auth_type` _(string)_: When multiple auth attributes are available in the environment, use the auth type specified by this argument. This argument also holds currently selected auth. +* `http_timeout_seconds` _(int)_: Number of seconds for HTTP timeout. +* `debug_truncate_bytes` _(int)_: Truncate JSON fields in debug logs above this limit. Default is 96. Environment: `DATABRICKS_DEBUG_TRUNCATE_BYTES` +* `debug_headers` _(bool)_: Debug HTTP headers of requests made by the application. Default is false, as headers contain sensitive data, like tokens. Environment: `DATABRICKS_DEBUG_HEADERS`. + * `rate_limit` _(int)_: Maximum number of requests per second made to Databricks REST API. Environment: `DATABRICKS_RATE_LIMIT` + +## `databricks.yml` + +Frequently, developers work on more than a single project from their workstations. Having a per-project `databricks.yml` configuration file created by [`bricks init`](project-lifecycle.md#init) helps achieving resource isolation and connectivity credentials flexibility. + +### Development Cluster + +Every project, with the exception of several [project flavors](project-flavors.md) may have a Databricks Cluster, where groups or individual data engineers run Spark queries in the Databricks Runtime. It's also possible to [isolate](#isolation-levels) clusters. + +### Project Flavor + +[Project Flavors](project-flavors.md) are the features, that detect the intended behavior of deployments during the [project lifecycle](project-lifecycle.md). + +### Isolation Levels + +It's possible to achieve _soft isolation_ levels for multiple developers to independently work on the same project, like having different branches in Git. \ No newline at end of file diff --git a/doc/project-flavors.md b/doc/project-flavors.md new file mode 100644 index 00000000..d2025247 --- /dev/null +++ b/doc/project-flavors.md @@ -0,0 +1,13 @@ +Project Flavors +--- + +`bricks` CLI detects variout project flavors dynamically every run, though sometimes you may be interested in overriding the defaults. + +## Maven + +If there's a `pom.xml` file in the same folder as [`databricks.yml`](configuration.md), `mvn clean package` is invoked during [`build`](project-lifecycle.md#build) stage, followed by uploading `target/$artifactId-$version.jar` file to DBFS during the [`upload`](project-lifecycle.md#upload) stage, installing it as a library on [Development Cluster](configuration.md#development-cluster) and waiting for the installation to succeed, reporting the error back otherwise. + +## Python + +If there's a `setup.py` file in the [project root](configuration.md), ... + diff --git a/doc/project-lifecycle.md b/doc/project-lifecycle.md new file mode 100644 index 00000000..da208590 --- /dev/null +++ b/doc/project-lifecycle.md @@ -0,0 +1,28 @@ +Project Lifecycle +--- + +Project lifecycle consists of different execution phases. This document aims at describing them as toroughly as possible. + +## `init` + +`bricks init` creates a [`databricks.yml`](configuration.md) file in the directory, where `bricks` CLI was invoked. It walks you through the interactive command prompts. The goal of this stage is to setup project flavor and connectivity to a Databricks workspapce. + +## `prepare` + +`bricks prepare` prepares the local filesystem for the following lifecycle stages, like rolling out the relevant Virtual Environment for [Python projects](project-flavors.md#python). + +## `build` + +`bricks build` triggers the relevant commands to package artifacts, like Java or Scala [JARs](project-flavors.md#maven) or Python [Wheels](project-flavors.md#python). It's also possible to have a multi-flavor project, like [Mosaic](https://github.com/databrickslabs/mosaic), where built Wheel depends on a built JAR. + +## `upload` + +`bricks upload` takes the artifacts created by [`bricks build`](#build) and uploads them to a path following configured [isolation level](configuration.md#isolation-levels). + +## `deploy` + +.. creates clusters + +## `install` + +`bricks install` takes remote paths created by [`bricks upload`](#upload) for artifacts created by [`bricks build`](#build) and installs them on [Development Cluster](configuration.md#development-cluster) following the configured [isolation level](configuration.md#isolation-levels). \ No newline at end of file diff --git a/ext/databricks-sdk-go b/ext/databricks-sdk-go index b719dadd..6cb641a6 160000 --- a/ext/databricks-sdk-go +++ b/ext/databricks-sdk-go @@ -1 +1 @@ -Subproject commit b719dadd27a5cb6c67db0b6ddef5458ec31cc8c0 +Subproject commit 6cb641a6288ad0e4399bcf7cdf0156a12919bdb3 diff --git a/go.mod b/go.mod index 7dea5c30..4b21e326 100644 --- a/go.mod +++ b/go.mod @@ -13,27 +13,31 @@ require ( github.com/spf13/cobra v1.5.0 // Apache 2.0 github.com/stretchr/testify v1.8.0 // MIT github.com/whilp/git-urls v1.0.0 // MIT - golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 // BSD-3-Clause + golang.org/x/mod v0.6.0 // BSD-3-Clause gopkg.in/ini.v1 v1.67.0 // Apache 2.0 ) -require golang.org/x/sync v0.0.0-20220513210516-0976fa681c29 - require ( cloud.google.com/go/compute v1.6.1 // indirect + github.com/briandowns/spinner v1.19.0 github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e // indirect github.com/davecgh/go-spew v1.1.1 // indirect + github.com/fatih/color v1.7.0 github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect github.com/golang/protobuf v1.5.2 // indirect github.com/google/go-querystring v1.1.0 // indirect github.com/inconshreveable/mousetrap v1.0.0 // indirect github.com/kr/text v0.2.0 // indirect + github.com/mattn/go-colorable v0.1.2 // indirect + github.com/mattn/go-isatty v0.0.8 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/spf13/pflag v1.0.5 // indirect go.opencensus.io v0.23.0 // indirect + golang.org/x/exp v0.0.0-20221028150844-83b7d23a625f golang.org/x/net v0.0.0-20220526153639-5463443f8c37 // indirect golang.org/x/oauth2 v0.0.0-20220628200809-02e64fa58f26 // indirect - golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a // indirect + golang.org/x/sync v0.0.0-20220513210516-0976fa681c29 + golang.org/x/sys v0.1.0 // indirect golang.org/x/text v0.3.7 // indirect golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac // indirect google.golang.org/api v0.82.0 // indirect diff --git a/go.sum b/go.sum index 31e98125..9870a72e 100644 --- a/go.sum +++ b/go.sum @@ -57,6 +57,8 @@ github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAE github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= +github.com/briandowns/spinner v1.19.0 h1:s8aq38H+Qju89yhp89b4iIiMzMm8YN3p6vGpwyh/a8E= +github.com/briandowns/spinner v1.19.0/go.mod h1:mQak9GHqbspjC/5iUx3qMlIho8xBS/ppAL/hX5SmPJU= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= @@ -91,6 +93,8 @@ github.com/envoyproxy/go-control-plane v0.9.9-0.20210512163311-63b5d3c536b0/go.m github.com/envoyproxy/go-control-plane v0.9.10-0.20210907150352-cf90f659a021/go.mod h1:AFq3mo9L8Lqqiid3OhADV3RfLJnjiw63cSpi+fDTRC0= github.com/envoyproxy/go-control-plane v0.10.2-0.20220325020618-49ff273808a1/go.mod h1:KJwIaB5Mv44NWtYuAOFCVOjcI94vtpEz2JU/D2v6IjE= github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= +github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys= +github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/ghodss/yaml v1.0.0 h1:wQHKEahhL6wmXdzwWG11gIVCkOv05bNOh+Rxn0yngAk= github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= @@ -195,6 +199,10 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/manifoldco/promptui v0.9.0 h1:3V4HzJk1TtXW1MTZMP7mdlwbBpIinw3HztaIlYthEiA= github.com/manifoldco/promptui v0.9.0/go.mod h1:ka04sppxSGFAtxX0qhlYQjISsg9mR4GWtQEhdbn6Pgg= +github.com/mattn/go-colorable v0.1.2 h1:/bC9yWikZXAL9uJdulbSfyVNIR3n3trXl+v8+1sx8mU= +github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-isatty v0.0.8 h1:HLtExJ+uU2HOZ+wI0Tt5DtUDrx8yhUqDcp7fYERX4CE= +github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 h1:KoWmjvw+nsYOo29YJK9vDA65RGE3NrOnUtO7a+RF9HU= @@ -252,6 +260,8 @@ golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u0 golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4= golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM= golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU= +golang.org/x/exp v0.0.0-20221028150844-83b7d23a625f h1:Al51T6tzvuh3oiwX11vex3QgJ2XTedFPGmbEVh8cdoc= +golang.org/x/exp v0.0.0-20221028150844-83b7d23a625f/go.mod h1:CxIveKay+FTh1D0yPZemJVgC/95VzuuOLq5Qi4xnoYc= golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js= golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= @@ -277,8 +287,8 @@ golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 h1:kQgndtyPBW/JIYERgdxfwMYh3AVStj88WQTlNDi2a+o= -golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3/go.mod h1:3p9vT2HGsQu2K1YbXdKPJLVgG5VJdoTa1poYQBtP1AY= +golang.org/x/mod v0.6.0 h1:b9gGHsz9/HhJ3HF5DHQytPpuwocVTChQJK3AvoLRD5I= +golang.org/x/mod v0.6.0/go.mod h1:4mET923SAdbXp2ki8ey+zGs1SLqsuM2Y0uvdZR/fUNI= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -359,6 +369,7 @@ golang.org/x/sync v0.0.0-20220513210516-0976fa681c29/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20181122145206-62eef0e2fa9b/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -413,8 +424,9 @@ golang.org/x/sys v0.0.0-20220227234510-4e6760a101f9/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20220328115105-d36c6a25d886/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220412211240-33da011f77ad/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220502124256-b6088ccd6cba/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a h1:dGzPydgVsqGcTRVwiLJ1jVbufYwmzD3LfVPLKsKg+0k= golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/lib/dbr/info.go b/lib/dbr/info.go new file mode 100644 index 00000000..0f56aab2 --- /dev/null +++ b/lib/dbr/info.go @@ -0,0 +1,88 @@ +package dbr + +import ( + "context" + "encoding/json" + "fmt" + + "github.com/databricks/databricks-sdk-go/retries" + "github.com/databricks/databricks-sdk-go/service/clusters" + "github.com/databricks/databricks-sdk-go/workspaces" +) + +type RuntimeInfo struct { + Name string `json:"name"` + SparkVersion string `json:"spark_version"` + PythonVersion string `json:"python_version"` + PyPI []Package `json:"pypi"` + Jars []Package `json:"jars"` +} + +type Package struct { + Group string `json:"group,omitempty"` + Name string `json:"name"` + Version string `json:"version"` +} + +func (pkg *Package) PyPiName() string { + return fmt.Sprintf("%s==%s", pkg.Name, pkg.Version) +} + +func GetRuntimeInfo(ctx context.Context, w *workspaces.WorkspacesClient, + clusterId string, status func(string)) (*RuntimeInfo, error) { + cluster, err := w.Clusters.GetByClusterId(ctx, clusterId) + if err != nil { + return nil, err + } + if !cluster.IsRunningOrResizing() { + _, err = w.Clusters.StartByClusterIdAndWait(ctx, clusterId, + func(i *retries.Info[clusters.ClusterInfo]) { + status(i.Info.StateMessage) + }) + if err != nil { + return nil, err + } + } + command := w.CommandExecutor.Execute(ctx, clusterId, "python", infoScript) + if command.Failed() { + return nil, command.Err() + } + var info RuntimeInfo + err = json.Unmarshal([]byte(command.Text()), &info) + if err != nil { + return nil, err + } + return &info, nil +} + +const infoScript = `import pkg_resources, json, sys, platform, subprocess +from pyspark.version import __version__ + +jars = [] +for j in subprocess.check_output(['ls', '-1', '/databricks/jars']).decode().split("\n"): + if '--mvn--' in j or '--maven-trees--' in j: + split = j.split('--')[-1][:-4].split('__') + if not len(split) == 3: + continue + group, artifactId, version = split + jars.append({ + 'group': group, + 'name': artifactId, + 'version': version, + }) +jars = sorted(jars, key=lambda jar: (jar['group'], jar['name'])) + +python_packages = [ + {"name": n, "version": v} + for n, v in sorted([(i.key, i.version) + for i in pkg_resources.working_set])] +python_packages = sorted(python_packages, key=lambda x: x['name']) + +runtime = spark.conf.get('spark.databricks.clusterUsageTags.sparkVersion') +print(json.dumps({ + 'name': runtime, + 'spark_version': __version__[0:5], + 'python_version': platform.python_version(), + 'pypi': python_packages, + 'jars': jars, +}))` diff --git a/lib/fileset/file.go b/lib/fileset/file.go new file mode 100644 index 00000000..a98b266f --- /dev/null +++ b/lib/fileset/file.go @@ -0,0 +1,73 @@ +package fileset + +import ( + "io" + "io/fs" + "log" + "os" + "path" + "regexp" + "strings" + "time" +) + +type File struct { + fs.DirEntry + Absolute string + Relative string +} + +func (f File) Modified() (ts time.Time) { + info, err := f.Info() + if err != nil { + // return default time, beginning of epoch + return ts + } + return info.ModTime() +} + +func (fi File) Ext(suffix string) bool { + return strings.HasSuffix(fi.Name(), suffix) +} + +func (fi File) Dir() string { + return path.Dir(fi.Absolute) +} + +func (fi File) MustMatch(needle string) bool { + return fi.Match(regexp.MustCompile(needle)) +} + +func (fi File) FindAll(needle *regexp.Regexp) (all []string, err error) { + raw, err := fi.Raw() + if err != nil { + log.Printf("[ERROR] read %s: %s", fi.Absolute, err) + return nil, err + } + for _, v := range needle.FindAllStringSubmatch(string(raw), -1) { + all = append(all, v[1]) + } + return all, nil +} + +func (fi File) Match(needle *regexp.Regexp) bool { + raw, err := fi.Raw() + if err != nil { + log.Printf("[ERROR] read %s: %s", fi.Absolute, err) + return false + } + return needle.Match(raw) +} + +func (fi File) Raw() ([]byte, error) { + f, err := fi.Open() + if err != nil { + return nil, err + } + return io.ReadAll(f) +} + +func (fi File) Open() (*os.File, error) { + return os.Open(fi.Absolute) +} + diff --git a/lib/fileset/fileset.go b/lib/fileset/fileset.go new file mode 100644 index 00000000..7aa31b33 --- /dev/null +++ b/lib/fileset/fileset.go @@ -0,0 +1,98 @@ +package fileset + +import ( + "fmt" + "os" + "path" + "path/filepath" + "regexp" + "strings" +) + +type FileSet []File + +func (fi FileSet) Root() string { + return strings.TrimSuffix( + strings.ReplaceAll(fi[0].Absolute, fi[0].Relative, ""), + "/") +} + +func (fi FileSet) FirstMatch(pathRegex, needleRegex string) *File { + path := regexp.MustCompile(pathRegex) + needle := regexp.MustCompile(needleRegex) + for _, v := range fi { + if !path.MatchString(v.Absolute) { + continue + } + if v.Match(needle) { + return &v + } + } + return nil +} + +func (fi FileSet) FindAll(pathRegex, needleRegex string) (map[File][]string, error) { + path := regexp.MustCompile(pathRegex) + needle := regexp.MustCompile(needleRegex) + all := map[File][]string{} + for _, v := range fi { + if !path.MatchString(v.Absolute) { + continue + } + vall, err := v.FindAll(needle) + if err != nil { + return nil, fmt.Errorf("%s: %w", v.Relative, err) + } + all[v] = vall + } + return all, nil +} + +func (fi FileSet) Exists(pathRegex, needleRegex string) bool { + m := fi.FirstMatch(pathRegex, needleRegex) + return m != nil +} + +func RecursiveChildren(dir, root string) (found FileSet, err error) { + // TODO: add options to skip, like current.Name() == "vendor" + queue, err := ReadDir(dir, root) + if err != nil { + return nil, err + } + for len(queue) > 0 { + current := queue[0] + queue = queue[1:] + if !current.IsDir() { + current.Relative = strings.ReplaceAll(current.Absolute, dir+"/", "") + found = append(found, current) + continue + } + children, err := ReadDir(current.Absolute, root) + if err != nil { + return nil, err + } + queue = append(queue, children...) + } + return found, nil +} + +func ReadDir(dir, root string) (queue []File, err error) { + f, err := os.Open(dir) + if err != nil { + return + } + defer f.Close() + dirs, err := f.ReadDir(-1) + if err != nil { + return + } + for _, v := range dirs { + absolute := path.Join(dir, v.Name()) + relative, err := filepath.Rel(root, absolute) + if err != nil { + return nil, err + } + queue = append(queue, File{v, absolute, relative}) + } + return +} diff --git a/lib/flavor/mvn/mvn.go b/lib/flavor/mvn/mvn.go new file mode 100644 index 00000000..1c464993 --- /dev/null +++ b/lib/flavor/mvn/mvn.go @@ -0,0 +1,98 @@ +package mvn + +import ( + "context" + "encoding/xml" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/databricks/bricks/lib/flavor" + "github.com/databricks/bricks/lib/spawn" + "github.com/databricks/databricks-sdk-go/service/libraries" +) + +type Pom struct { + Name string `xml:"name"` + GroupID string `xml:"groupId"` + ArtifactID string `xml:"artifactId"` + Version string `xml:"version"` +} + +func (pom *Pom) Jar() string { + return fmt.Sprintf("%s-%s.jar", pom.ArtifactID, pom.Version) +} + +type Maven struct { + SkipTests bool `json:"skip_tests,omitempty"` +} + +func (mvn *Maven) RequiresCluster() bool { + return true +} + +// Java libraries always require cluster restart +func (mvn *Maven) RequiresRestart() bool { + return true +} + +func (mvn *Maven) Detected(prj flavor.Project) bool { + _, err := os.Stat(filepath.Join(prj.Root(), "pom.xml")) + return err == nil +} + +func (mvn *Maven) LocalArtifacts(ctx context.Context, prj flavor.Project) (flavor.Artifacts, error) { + pom, err := mvn.Pom(prj.Root()) + if err != nil { + return nil, err + } + return flavor.Artifacts{ + { + Flavor: mvn, + Library: libraries.Library{ + Jar: fmt.Sprintf("%s/target/%s", prj.Root(), pom.Jar()), + }, + }, + }, nil +} + +func (mvn *Maven) Pom(root string) (*Pom, error) { + // TODO: perhaps we should call effective-pom, specially once + // we start comparing local spark version and the one on DBR + pomFile := fmt.Sprintf("%s/pom.xml", root) + pomHandle, err := os.Open(pomFile) + if err != nil { + return nil, fmt.Errorf("open %s: %w", pomFile, err) + } + pomBytes, err := io.ReadAll(pomHandle) + if err != nil { + return nil, fmt.Errorf("read %s: %w", pomFile, err) + } + var pom Pom + err = xml.Unmarshal(pomBytes, &pom) + if err != nil { + return nil, fmt.Errorf("parse %s: %w", pomFile, err) + } + return &pom, nil +} + +func (mvn *Maven) Build(ctx context.Context, prj flavor.Project, status func(string)) error { + mavenPath, err := spawn.DetectExecutable(ctx, "mvn") + if err != nil { + return fmt.Errorf("no Maven installed: %w", err) + } + // report back the name of the JAR to the user + pom, _ := mvn.Pom(prj.Root()) + status(fmt.Sprintf("Buidling %s", pom.Jar())) + args := []string{fmt.Sprintf("--file=%s/pom.xml", prj.Root())} + args = append(args, "-DskipTests=true") + args = append(args, "clean", "package") + _, err = spawn.ExecAndPassErr(ctx, mavenPath, args...) + if err != nil { + // TODO: figure out error reporting in a generic way + // one of the options is to re-run the same command with stdout forwarding + return fmt.Errorf("mvn package: %w", err) + } + return nil +} diff --git a/lib/flavor/notebooks/notebooks.go b/lib/flavor/notebooks/notebooks.go new file mode 100644 index 00000000..10b6ddd0 --- /dev/null +++ b/lib/flavor/notebooks/notebooks.go @@ -0,0 +1,41 @@ +package notebooks + +import ( + "context" + "fmt" + "os" + "path/filepath" + + "github.com/databricks/bricks/lib/fileset" + "github.com/databricks/bricks/lib/flavor" +) + +type Notebooks struct { + Folder string `json:"folder"` +} + +func (n *Notebooks) Detected(prj flavor.Project) bool { + _, err := os.Stat(filepath.Join(prj.Root(), n.Folder)) + return err == nil +} + +func (n *Notebooks) LocalArtifacts(ctx context.Context, prj flavor.Project) (flavor.Artifacts, error) { + all := flavor.Artifacts{} + found, err := fileset.RecursiveChildren(filepath.Join(prj.Root(), n.Folder), prj.Root()) + if err != nil { + return nil, fmt.Errorf("list notebooks: %w", err) + } + for _, f := range found { + if !f.MustMatch("# Databricks notebook source") { + continue + } + all = append(all, flavor.Artifact{ + Notebook: &flavor.Notebook{ + LocalAbsolute: f.Absolute, + RemoteRelative: f.Relative, // TODO: TBD behavior with regards to isolation + }, + Flavor: n, + }) + } + return all, nil +} diff --git a/lib/flavor/pkg.go b/lib/flavor/pkg.go new file mode 100644 index 00000000..895ba3c0 --- /dev/null +++ b/lib/flavor/pkg.go @@ -0,0 +1,120 @@ +package flavor + +import ( + "context" + "path/filepath" + "strings" + + "github.com/databricks/databricks-sdk-go/service/libraries" + "github.com/databricks/databricks-sdk-go/service/workspace" + "github.com/databricks/databricks-sdk-go/workspaces" +) + +type Project interface { + Root() string + WorkspacesClient() *workspaces.WorkspacesClient + GetDevelopmentClusterId(ctx context.Context) (clusterId string, err error) +} + +type Flavor interface { + // Detected returns true on successful metadata checks + Detected(Project) bool + + // LocalArtifacts show (cached) relevant files that _should_ exist + // on local filesystem + LocalArtifacts(context.Context, Project) (Artifacts, error) +} + +type Notebook struct { + LocalAbsolute string + RemoteRelative string +} + +type Artifact struct { + libraries.Library + Notebook *Notebook + Flavor Flavor +} + +type notebookLanguageFormat struct { + Language workspace.ImportLanguage + Format workspace.ImportFormat + Overwrite bool +} + +var extMap = map[string]notebookLanguageFormat{ + ".scala": {"SCALA", "SOURCE", true}, + ".py": {"PYTHON", "SOURCE", true}, + ".sql": {"SQL", "SOURCE", true}, + ".r": {"R", "SOURCE", true}, + ".dbc": {"", "DBC", false}, +} + +func (a Artifact) IsLibrary() bool { + return a.Library.String() != "unknown" +} + +func (a Artifact) NotebookInfo() (*notebookLanguageFormat, bool) { + if a.Notebook == nil { + return nil, false + } + ext := strings.ToLower(filepath.Ext(a.Notebook.LocalAbsolute)) + f, ok := extMap[ext] + return &f, ok +} + +type Kind int + +const ( + LocalNotebook Kind = iota + LocalJar + LocalWheel + LocalEgg + RegistryLibrary +) + +func (k Kind) RequiresBuild() bool { + switch k { + case LocalJar, LocalWheel, LocalEgg: + return true + default: + return false + } +} + +func (a Artifact) KindAndLocation() (Kind, string) { + if a.Notebook != nil { + return LocalNotebook, a.Notebook.LocalAbsolute + } + if a.Jar != "" { + return LocalJar, a.Jar + } + if a.Whl != "" { + return LocalWheel, a.Whl + } + if a.Egg != "" { + return LocalEgg, a.Egg + } + return RegistryLibrary, "" +} + +type Artifacts []Artifact + +func (a Artifacts) RequiresBuild() bool { + for _, v := range a { + k, _ := v.KindAndLocation() + if k.RequiresBuild() { + return true + } + } + return false +} + +func (a Artifacts) HasLibraries() bool { + for _, v := range a { + if v.IsLibrary() { + return true + } + } + return false +} diff --git a/python/env.go b/lib/flavor/py/env.go similarity index 66% rename from python/env.go rename to lib/flavor/py/env.go index 2032c5b1..0ea17c4e 100644 --- a/python/env.go +++ b/lib/flavor/py/env.go @@ -1,12 +1,14 @@ -package python +package py import ( "context" "encoding/json" "fmt" "log" + "regexp" "strings" + "github.com/databricks/bricks/python" "golang.org/x/mod/semver" ) @@ -14,7 +16,11 @@ type Dependency struct { Name string Operator string Version string - Location string // @ file:///usr/loca + Location string // @ file:///usr/local +} + +func (d Dependency) NormalizedName() string { + return strings.ToLower(d.Name) } func (d Dependency) CanonicalVersion() string { @@ -24,33 +30,25 @@ func (d Dependency) CanonicalVersion() string { type Environment []Dependency func (e Environment) Has(name string) bool { + dep := DependencyFromSpec(name) for _, d := range e { - if d.Name == name { + if d.NormalizedName() == dep.NormalizedName() { return true } } return false } -func Freeze(ctx context.Context) (Environment, error) { - out, err := Py(ctx, "-m", "pip", "freeze") - if err != nil { - return nil, err - } - env := Environment{} - deps := strings.Split(out, "\n") - for _, raw := range deps { - env = append(env, DependencyFromSpec(raw)) - } - return env, nil -} - func DependencyFromSpec(raw string) (d Dependency) { + raw = strings.ToLower(strings.TrimSpace(raw)) // TODO: write a normal parser for this + // see https://peps.python.org/pep-0508/#grammar rawSplit := strings.Split(raw, "==") if len(rawSplit) != 2 { log.Printf("[DEBUG] Skipping invalid dep: %s", raw) - return + return Dependency{ + Name: raw, + } } d.Name = rawSplit[0] d.Operator = "==" @@ -65,6 +63,7 @@ type Distribution struct { Version string `json:"version"` Packages []string `json:"packages"` InstallRequires []string `json:"install_requires,omitempty"` + TestsRequire []string `json:"tests_require,omitempty"` } // InstallEnvironment returns only direct install dependencies @@ -75,18 +74,30 @@ func (d Distribution) InstallEnvironment() (env Environment) { return } +// See: ttps://peps.python.org/pep-0503/#normalized-names +var pep503 = regexp.MustCompile(`[-_.]+`) + // NormalizedName returns PEP503-compatible Python Package Index project name. // As per PEP 426 the only valid characters in a name are the ASCII alphabet, // ASCII numbers, ., -, and _. The name should be lowercased with all runs of // the characters ., -, or _ replaced with a single - character. func (d Distribution) NormalizedName() string { - // TODO: implement https://peps.python.org/pep-0503/#normalized-names - return d.Name + return pep503.ReplaceAllString(d.Name, "-") +} + +// See: https://peps.python.org/pep-0491/#escaping-and-unicode +var pep491 = regexp.MustCompile(`[^\w\d.]+`) + +func (d Distribution) WheelName() string { + // Each component of the filename is escaped by replacing runs + // of non-alphanumeric characters with an underscore _ + distName := pep491.ReplaceAllString(d.NormalizedName(), "_") + return fmt.Sprintf("%s-%s-py3-none-any.whl", distName, d.Version) } // ReadDistribution "parses" metadata from setup.py file. func ReadDistribution(ctx context.Context) (d Distribution, err error) { - out, err := PyInline(ctx, ` + out, err := python.PyInline(ctx, ` import setuptools, json, sys setup_config = {} # actual args for setuptools.dist.Distribution def capture(**kwargs): global setup_config; setup_config = kwargs diff --git a/lib/flavor/py/env_test.go b/lib/flavor/py/env_test.go new file mode 100644 index 00000000..a0e0ea4e --- /dev/null +++ b/lib/flavor/py/env_test.go @@ -0,0 +1,18 @@ +package py + +import ( + "context" + "testing" + + "github.com/databricks/bricks/lib/spawn" + "github.com/stretchr/testify/assert" +) + +func TestPyInlineX(t *testing.T) { + ctx := spawn.WithRoot(context.Background(), "testdata/simple-python-wheel") + dist, err := ReadDistribution(ctx) + assert.NoError(t, err) + assert.Equal(t, "dummy", dist.Name) + assert.Equal(t, "dummy", dist.Packages[0]) + assert.True(t, dist.InstallEnvironment().Has("requests")) +} diff --git a/lib/flavor/py/setup_py.go b/lib/flavor/py/setup_py.go new file mode 100644 index 00000000..e1baf402 --- /dev/null +++ b/lib/flavor/py/setup_py.go @@ -0,0 +1,307 @@ +package py + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "io/fs" + "os" + "path/filepath" + "strings" + + "golang.org/x/exp/slices" + + "github.com/databricks/bricks/lib/dbr" + "github.com/databricks/bricks/lib/flavor" + "github.com/databricks/bricks/lib/spawn" + "github.com/databricks/bricks/python" + "github.com/databricks/databricks-sdk-go/databricks/apierr" + "github.com/databricks/databricks-sdk-go/service/commands" + "github.com/databricks/databricks-sdk-go/service/libraries" +) + +type SetupDotPy struct { + SetupPy string `json:"setup_py,omitempty"` + MirrorLibraries bool `json:"mirror_libraries,omitempty"` + + venv string + wheelName string +} + +func (s *SetupDotPy) RequiresCluster() bool { + return true +} + +// Python libraries do not require a restart +func (s *SetupDotPy) RequiresRestart() bool { + return false +} + +func (s *SetupDotPy) setupPyLoc(prj flavor.Project) string { + if s.SetupPy == "" { + s.SetupPy = "setup.py" + } + return filepath.Join(prj.Root(), s.SetupPy) +} + +// We detect only setuptools build backend for now. Hatchling, PDM, +// and Flit _might_ be added in some distant future. +// +// See: https://packaging.python.org/en/latest/tutorials/packaging-projects/ +func (s *SetupDotPy) Detected(prj flavor.Project) bool { + _, err := os.Stat(s.setupPyLoc(prj)) + return err == nil +} + +// readDistribution "parses" metadata from setup.py file from context +// of current project root and virtual env +func (s *SetupDotPy) readDistribution(ctx context.Context, prj flavor.Project) (d Distribution, err error) { + ctx = spawn.WithRoot(ctx, filepath.Dir(s.setupPyLoc(prj))) + out, err := python.Py(ctx, "-c", commands.TrimLeadingWhitespace(` + import setuptools, json, sys + setup_config = {} # actual args for setuptools.dist.Distribution + def capture(**kwargs): global setup_config; setup_config = kwargs + setuptools.setup = capture + import setup + json.dump(setup_config, sys.stdout)`)) + if err != nil { + return + } + err = json.Unmarshal([]byte(out), &d) + return +} + +func (s *SetupDotPy) Prepare(ctx context.Context, prj flavor.Project, status func(string)) error { + venv, err := s.fastDetectVirtualEnv(prj.Root()) + if err != nil { + return err + } + s.venv = venv + if s.venv == "" { + // this allows CLI to be usable in existing projects with existing virtualenvs + venv = filepath.Join(prj.Root(), ".databricks") // TODO: integrate with pipenv + err := os.MkdirAll(venv, 0o700) + if err != nil { + return fmt.Errorf("mk venv: %w", err) + } + status(fmt.Sprintf("Creating virtualenv in %s", venv)) + _, err = python.Py(ctx, "-m", "venv", venv) + if err != nil { + return fmt.Errorf("create venv: %w", err) + } + s.venv = venv + status("Upgrading pip") + _, err = s.Pip(ctx, "install", "--upgrade", "pip", "wheel") + if err != nil { + return fmt.Errorf("upgrade pip: %w", err) + } + } + env, err := s.Freeze(ctx) + if err != nil { + return fmt.Errorf("pip freeze: %s", err) + } + var remotePkgs []string + d, err := s.readDistribution(ctx, prj) + if err != nil { + return fmt.Errorf("setup.py: %s", err) + } + if s.MirrorLibraries { + // TODO: name `MirrorLibraries` is TBD + // TODO: must be part of init command survey + status("Fetching remote libraries") + remoteInfo, err := s.runtimeInfo(ctx, prj, status) + if err != nil && errors.As(err, &apierr.APIError{}) { + return err + } + // TODO: check Spark compatibility with locall install_requires + // TODO: check Python version compatibility with local virtualenv + if err != errNoCluster { + skipLibs := []string{ + "dbus-python", + "distro-info", + "pip", + "psycopg2", + "pygobject", + "python-apt", + "requests-unixsocket", + "setuptools", + "unattended-upgrades", + "wheel", + } + PYPI: + for _, pkg := range remoteInfo.PyPI { + if env.Has(pkg.PyPiName()) { + continue + } + if pkg.Name == d.NormalizedName() { + // skip installing self + continue + } + for _, skip := range skipLibs { + if skip == pkg.Name { + continue PYPI + } + } + remotePkgs = append(remotePkgs, pkg.PyPiName()) + } + } + } + type depList struct { + name string + packages []string + } + dbrDepsName := "remote cluster" + dependencyLists := []depList{ + {dbrDepsName, remotePkgs}, + {"install_requires", d.InstallRequires}, + {"tests_require", d.TestsRequire}, + } + for _, deps := range dependencyLists { + for _, dep := range deps.packages { + if env.Has(dep) { + continue + } + status(fmt.Sprintf("Installing %s in virtualenv (%s)", dep, deps.name)) + _, err = s.Pip(ctx, "install", "--prefer-binary", dep) + if err != nil && deps.name == dbrDepsName && + strings.Contains(err.Error(), "Could not find a version") { + continue + } + if err != nil { + return fmt.Errorf("%s: %w", dep, err) + } + // repeatedly run pip freeze so that we potentially have less installs + env, err = s.Freeze(ctx) + if err != nil { + return fmt.Errorf("pip freeze: %s", err) + } + } + } + return nil +} + +var errNoCluster = errors.New("no development cluster") + +func (s *SetupDotPy) runtimeInfo(ctx context.Context, prj flavor.Project, + status func(string)) (*dbr.RuntimeInfo, error) { + clusterId, err := prj.GetDevelopmentClusterId(ctx) + if err != nil && errors.As(err, &apierr.APIError{}) { + return nil, err + } + if err != nil { + return nil, errNoCluster + } + return dbr.GetRuntimeInfo(ctx, prj.WorkspacesClient(), clusterId, status) +} + +func (s *SetupDotPy) Freeze(ctx context.Context) (Environment, error) { + out, err := s.Pip(ctx, "freeze") + if err != nil { + return nil, err + } + env := Environment{} + deps := strings.Split(out, "\n") + for _, raw := range deps { + env = append(env, DependencyFromSpec(raw)) + } + return env, nil +} + +func (s *SetupDotPy) LocalArtifacts(ctx context.Context, prj flavor.Project) (flavor.Artifacts, error) { + dist, err := s.readDistribution(ctx, prj) + if err != nil { + return nil, err + } + all := flavor.Artifacts{} + // install dependencies for the wheel to run + for _, dependency := range dist.InstallRequires { + if strings.HasPrefix(dependency, "pyspark") { + // pyspark will conflict with DBR + continue + } + all = append(all, flavor.Artifact{ + Flavor: s, + Library: libraries.Library{ + Pypi: &libraries.PythonPyPiLibrary{ + Package: dependency, + }, + }, + }) + } + s.wheelName = dist.WheelName() + all = append(all, flavor.Artifact{ + Flavor: s, + Library: libraries.Library{ + Whl: fmt.Sprintf("%s/.databricks/dist/%s", prj.Root(), s.wheelName), + }, + }) + return all, nil +} + +// Build creates a python wheel, while keeping project root in a clean state, removing the need +// to execute rm -fr dist build *.egg-info after each build +func (s *SetupDotPy) Build(ctx context.Context, prj flavor.Project, status func(string)) error { + status(fmt.Sprintf("Building %s", s.wheelName)) + ctx = spawn.WithRoot(ctx, filepath.Dir(s.setupPyLoc(prj))) + _, err := s.Py(ctx, "setup.py", + // see https://github.com/pypa/setuptools/blob/main/setuptools/_distutils/command/build.py#L23-L31 + "build", "--build-lib=.databricks/build/lib", "--build-base=.databricks/build", + // see https://github.com/pypa/setuptools/blob/main/setuptools/command/egg_info.py#L167-L168 + "egg_info", "--egg-base=.databricks", + // see https://github.com/pypa/wheel/blob/main/src/wheel/bdist_wheel.py#L140 + "bdist_wheel", "--dist-dir=.databricks/dist") + return err +} + +// Py calls project-specific Python interpreter from the virtual env from project root dir +func (s *SetupDotPy) Py(ctx context.Context, args ...string) (string, error) { + if s.venv == "" { + return "", fmt.Errorf("virtualenv not detected") + } + out, err := spawn.ExecAndPassErr(ctx, fmt.Sprintf("%s/bin/python3", s.venv), args...) + if err != nil { + // current error message chain is longer: + // failed to call {pyExec} __non_existing__.py: {pyExec}: can't open + // ... file '{pwd}/__non_existing__.py': [Errno 2] No such file or directory" + // probably we'll need to make it shorter: + // can't open file '$PWD/__non_existing__.py': [Errno 2] No such file or directory + return "", err + } + return strings.Trim(string(out), "\n\r"), nil +} + +func (s *SetupDotPy) Pip(ctx context.Context, args ...string) (string, error) { + return s.Py(ctx, append([]string{"-m", "pip"}, args...)...) +} + +// fastDetectVirtualEnv performs very quick detection, by running over top level directories only +func (s *SetupDotPy) fastDetectVirtualEnv(root string) (string, error) { + wdf, err := os.Open(root) + if err != nil { + return "", err + } + files, err := wdf.ReadDir(0) + if err != nil { + return "", err + } + // virtual env is most likely in dot-directory + slices.SortFunc(files, func(a, b fs.DirEntry) bool { + return a.Name() < b.Name() + }) + for _, v := range files { + if !v.IsDir() { + continue + } + candidate := fmt.Sprintf("%s/%s", root, v.Name()) + _, err = os.Stat(fmt.Sprintf("%s/pyvenv.cfg", candidate)) + if errors.Is(err, os.ErrNotExist) { + continue + } + if err != nil { + return "", err + } + return candidate, nil + } + return "", nil +} diff --git a/python/testdata/simple-python-wheel/databricks.yml b/lib/flavor/py/testdata/simple-python-wheel/databricks.yml similarity index 100% rename from python/testdata/simple-python-wheel/databricks.yml rename to lib/flavor/py/testdata/simple-python-wheel/databricks.yml diff --git a/python/testdata/simple-python-wheel/dummy/__init__.py b/lib/flavor/py/testdata/simple-python-wheel/dummy/__init__.py similarity index 100% rename from python/testdata/simple-python-wheel/dummy/__init__.py rename to lib/flavor/py/testdata/simple-python-wheel/dummy/__init__.py diff --git a/python/testdata/simple-python-wheel/dummy/transforms.py b/lib/flavor/py/testdata/simple-python-wheel/dummy/transforms.py similarity index 100% rename from python/testdata/simple-python-wheel/dummy/transforms.py rename to lib/flavor/py/testdata/simple-python-wheel/dummy/transforms.py diff --git a/python/testdata/simple-python-wheel/setup.py b/lib/flavor/py/testdata/simple-python-wheel/setup.py similarity index 100% rename from python/testdata/simple-python-wheel/setup.py rename to lib/flavor/py/testdata/simple-python-wheel/setup.py diff --git a/python/wheel.go b/lib/flavor/py/wheel.go similarity index 75% rename from python/wheel.go rename to lib/flavor/py/wheel.go index 61e2a792..eebfa0fd 100644 --- a/python/wheel.go +++ b/lib/flavor/py/wheel.go @@ -1,26 +1,23 @@ -package python +package py import ( "context" "fmt" - "io" "log" "os" "path" "strings" - "github.com/databricks/bricks/project" - "github.com/databricks/bricks/utilities" + "github.com/databricks/bricks/python" ) func BuildWheel(ctx context.Context, dir string) (string, error) { - defer chdirAndBack(dir)() // remove previous dist leak os.RemoveAll("dist") // remove all other irrelevant traces silentlyCleanupWheelFolder(".") // call simple wheel builder. we may need to pip install wheel as well - out, err := Py(ctx, "setup.py", "bdist_wheel") + out, err := python.Py(ctx, "setup.py", "bdist_wheel") if err != nil { return "", err } @@ -50,7 +47,6 @@ func UploadWheelToDBFSWithPEP503(ctx context.Context, dir string) (string, error if err != nil { return "", err } - defer chdirAndBack(dir)() dist, err := ReadDistribution(ctx) if err != nil { return "", err @@ -61,28 +57,29 @@ func UploadWheelToDBFSWithPEP503(ctx context.Context, dir string) (string, error // PEP503 indexes can be rolled out to clusters via checksummed global init script, that creates // a driver/worker `/etc/pip.conf` with FUSE-mounted file:///dbfs/FileStore/wheels/simple/.. // extra index URLs. See more pointers at https://stackoverflow.com/q/30889494/277035 - dbfsLoc := fmt.Sprintf("%s/%s/%s", DBFSWheelLocation, dist.NormalizedName(), path.Base(wheel)) + _ = fmt.Sprintf("%s/%s/%s", DBFSWheelLocation, dist.NormalizedName(), path.Base(wheel)) + return "", err - wsc := project.Get(ctx).WorkspacesClient() - wf, err := os.Open(wheel) - if err != nil { - return "", err - } - defer wf.Close() - raw, err := io.ReadAll(wf) - if err != nil { - return "", err - } - // err = dbfs.Create(dbfsLoc, raw, true) - err = utilities.CreateDbfsFile(ctx, - wsc, - dbfsLoc, - raw, - true, - ) - // TODO: maintain PEP503 compliance and update meta-files: - // ${DBFSWheelLocation}/index.html and ${DBFSWheelLocation}/${NormalizedName}/index.html - return dbfsLoc, err + // wsc := project.Get(ctx).WorkspacesClient() + // wf, err := os.Open(wheel) + // if err != nil { + // return "", err + // } + // defer wf.Close() + // raw, err := io.ReadAll(wf) + // if err != nil { + // return "", err + // } + // // err = dbfs.Create(dbfsLoc, raw, true) + // err = utilities.CreateDbfsFile(ctx, + // wsc, + // dbfsLoc, + // raw, + // true, + // ) + // // TODO: maintain PEP503 compliance and update meta-files: + // // ${DBFSWheelLocation}/index.html and ${DBFSWheelLocation}/${NormalizedName}/index.html + // return dbfsLoc, err } func silentlyCleanupWheelFolder(dir string) { @@ -116,11 +113,3 @@ func silentChildWithSuffix(dir, suffix string) string { } return "" } - -func chdirAndBack(dir string) func() { - wd, _ := os.Getwd() - os.Chdir(dir) - return func() { - os.Chdir(wd) - } -} diff --git a/python/wheel_test.go b/lib/flavor/py/wheel_test.go similarity index 97% rename from python/wheel_test.go rename to lib/flavor/py/wheel_test.go index cc74eaf2..cbdb4c6f 100644 --- a/python/wheel_test.go +++ b/lib/flavor/py/wheel_test.go @@ -1,4 +1,4 @@ -package python +package py import ( "context" diff --git a/lib/flavor/script/script.go b/lib/flavor/script/script.go new file mode 100644 index 00000000..baf1d288 --- /dev/null +++ b/lib/flavor/script/script.go @@ -0,0 +1,55 @@ +package script + +import ( + "context" + "fmt" + "os" + + "github.com/databricks/bricks/lib/flavor" + "github.com/databricks/bricks/lib/spawn" + "github.com/databricks/databricks-sdk-go/databricks" +) + +type Script struct { + OnInit string `json:"init,omitempty"` + OnDeploy string `json:"deploy,omitempty"` +} + +// Detected returns true if any if the scripts are configured +func (s *Script) Detected(p flavor.Project) bool { + return s.OnInit != "" || s.OnDeploy != "" +} + +// TODO: move to a separate interface +func (s *Script) LocalArtifacts(ctx context.Context, p flavor.Project) (flavor.Artifacts, error) { + return nil, nil +} + +func (s *Script) Build(ctx context.Context, p flavor.Project, status func(string)) error { + if s.OnDeploy == "" { + return nil + } + cfg := p.WorkspacesClient().Config + err := cfg.EnsureResolved() + if err != nil { + return fmt.Errorf("config: %w", err) + } + for _, a := range databricks.ConfigAttributes { + if len(a.EnvVars) != 1 { + continue + } + v := a.GetString(cfg) + if v == "" { + continue + } + // set environment variables of the current process to propagate + // the authentication credentials + os.Setenv(a.EnvVars[0], v) + } + out, err := spawn.ExecAndPassErr(ctx, "/bin/sh", "-c", s.OnDeploy) + if err != nil { + println(string(out)) + return fmt.Errorf("failed: %s", s.OnDeploy) + } + return nil +} diff --git a/lib/spawn/exec.go b/lib/spawn/exec.go new file mode 100644 index 00000000..accc80a0 --- /dev/null +++ b/lib/spawn/exec.go @@ -0,0 +1,81 @@ +package spawn + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "log" + "os/exec" + "runtime" + "strings" +) + +type cxtKey int + +const ( + prjRoot cxtKey = iota +) + +func WithRoot(ctx context.Context, root string) context.Context { + return context.WithValue(ctx, prjRoot, root) +} + +func ExecAndPassErr(ctx context.Context, name string, args ...string) ([]byte, error) { + log.Printf("[DEBUG] Running %s %s", name, strings.Join(args, " ")) + + reader, writer := io.Pipe() + + out := bytes.NewBuffer([]byte{}) // add option to route to Stdout in verbose mode + go io.Copy(out, reader) + + cmd := exec.CommandContext(ctx, name, args...) + cmd.Stdout = writer + cmd.Stderr = writer + + root, ok := ctx.Value(prjRoot).(string) + if ok { + cmd.Dir = root + } + + err := cmd.Run() + _ = writer.Close() + _ = reader.Close() + + if err != nil { + return nil, fmt.Errorf(trimmedS(out.Bytes())) + } + + return []byte(trimmedS(out.Bytes())), nil +} + +func DetectExecutable(ctx context.Context, exec string) (string, error) { + detector := "which" + if runtime.GOOS == "windows" { + detector = "where.exe" + } + out, err := ExecAndPassErr(ctx, detector, exec) + if err != nil { + return "", err + } + return trimmedS(out), nil +} + +func nicerErr(err error) error { + if err == nil { + return nil + } + if ee, ok := err.(*exec.ExitError); ok { + errMsg := trimmedS(ee.Stderr) + if errMsg == "" { + errMsg = err.Error() + } + return errors.New(errMsg) + } + return err +} + +func trimmedS(bytes []byte) string { + return strings.Trim(string(bytes), "\n\r") +} diff --git a/lib/spawn/exec_test.go b/lib/spawn/exec_test.go new file mode 100644 index 00000000..93fd31b9 --- /dev/null +++ b/lib/spawn/exec_test.go @@ -0,0 +1,14 @@ +package spawn + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + + +func TestExecAndPassError(t *testing.T) { + _, err := ExecAndPassErr(context.Background(), "which", "__non_existing__") + assert.EqualError(t, err, "exit status 1") +} diff --git a/lib/ui/spinner.go b/lib/ui/spinner.go new file mode 100644 index 00000000..ea61beb2 --- /dev/null +++ b/lib/ui/spinner.go @@ -0,0 +1,53 @@ +package ui + +import ( + "context" + "math/rand" + "time" + + "github.com/briandowns/spinner" + "github.com/fatih/color" +) + +// https://github.com/leaanthony/spinner alternative + +func Spinner(label string, fn func(*spinner.Spinner) error, final string) error { + rand.Seed(time.Now().UnixMilli()) + s := spinner.New(spinner.CharSets[rand.Intn(11)], 200*time.Millisecond) + _ = s.Color("green") + s.Start() + s.Prefix = " " + s.Suffix = " " + label + + err := fn(s) + if err == nil { + s.FinalMSG = color.GreenString(" ✓ %s", final) // or ✓ + } else { + s.FinalMSG = color.RedString(" ✗ %s", err) // or + } + s.Stop() + println("") + return err +} + +type Stage struct { + InProgress string + Callback func(context.Context, func(string)) error + Complete string +} + +func SpinStages(ctx context.Context, stages []Stage) error { + for _, v := range stages { + err := Spinner(v.InProgress, func(s *spinner.Spinner) error { + updateMsg := func(msg string) { + // TODO: print to stdout for non-tty + s.Suffix = " " + msg + } + return v.Callback(ctx, updateMsg) + }, v.Complete) + if err != nil { + return err + } + } + return nil +} diff --git a/main.go b/main.go index 805c5bb6..dbce08b7 100644 --- a/main.go +++ b/main.go @@ -2,6 +2,7 @@ package main import ( _ "github.com/databricks/bricks/cmd/api" + _ "github.com/databricks/bricks/cmd/build" _ "github.com/databricks/bricks/cmd/configure" _ "github.com/databricks/bricks/cmd/fs" _ "github.com/databricks/bricks/cmd/init" diff --git a/project/config.go b/project/config.go index f718df31..78cccf9b 100644 --- a/project/config.go +++ b/project/config.go @@ -9,6 +9,10 @@ import ( "reflect" "github.com/databricks/bricks/folders" + "github.com/databricks/bricks/lib/flavor/mvn" + "github.com/databricks/bricks/lib/flavor/notebooks" + "github.com/databricks/bricks/lib/flavor/py" + "github.com/databricks/bricks/lib/flavor/script" "github.com/databricks/databricks-sdk-go/service/clusters" "github.com/ghodss/yaml" @@ -38,6 +42,11 @@ type Config struct { // development-time vs deployment-time resources DevCluster *clusters.ClusterInfo `json:"dev_cluster,omitempty"` + Maven *mvn.Maven `json:"mvn,omitempty"` + SetupPy *py.SetupDotPy `json:"python,omitempty"` + Script *script.Script `json:"script,omitempty"` + Notebooks *notebooks.Notebooks `json:"notebooks,omitempty"` + // Assertions defines a list of configurations expected to be applied // to the workspace by a higher-privileged user (or service principal) // in order for the deploy command to work, as individual project teams diff --git a/project/flavor.go b/project/flavor.go deleted file mode 100644 index 4c859926..00000000 --- a/project/flavor.go +++ /dev/null @@ -1,81 +0,0 @@ -package project - -// type Flavor interface { -// // Name returns a tuple of flavor key and readable name -// Name() (string, string) - -// // Detected returns true on successful metadata checks -// Detected() bool - -// // Build triggers packaging subprocesses -// Build(context.Context) error -// // TODO: Init() Questions -// // TODO: Deploy(context.Context) error -// } - -// var _ Flavor = PythonWheel{} - -// type PythonWheel struct{} - -// func (pw PythonWheel) Name() (string, string) { -// return "wheel", "Python Wheel" -// } - -// func (pw PythonWheel) Detected() bool { -// root, err := findProjectRoot() -// if err != nil { -// return false -// } -// _, err = os.Stat(fmt.Sprintf("%s/setup.py", root)) -// return err == nil -// } - -// func (pw PythonWheel) Build(ctx context.Context) error { -// defer toTheRootAndBack()() -// // do subprocesses or https://github.com/go-python/cpy3 -// // it all depends on complexity and binary size -// // TODO: detect if there's an .venv here and call setup.py with ENV vars of it -// // TODO: where.exe python (WIN) / which python (UNIX) -// cmd := exec.CommandContext(ctx, "python", "setup.py", "bdist-wheel") -// err := cmd.Run() -// if err != nil { -// return err -// } -// return nil -// } - -// func toTheRootAndBack() func() { -// wd, _ := os.Getwd() -// root, _ := findProjectRoot() -// os.Chdir(root) -// return func() { -// os.Chdir(wd) -// } -// } - -// var _ Flavor = PythonNotebooks{} - -// type PythonNotebooks struct{} - -// func (n PythonNotebooks) Name() (string, string) { -// // or just "notebooks", as we might shuffle in scala?... -// return "python-notebooks", "Python Notebooks" -// } - -// func (n PythonNotebooks) Detected() bool { -// // TODO: Steps: -// // - get all filenames -// // - read first X bytes from random 10 files and check -// // if they're "Databricks Notebook Source" -// return false -// } - -// func (n PythonNotebooks) Build(ctx context.Context) error { -// // TODO: perhaps some linting?.. -// return nil -// } - -// func (n PythonNotebooks) Deploy(ctx context.Context) error { -// // TODO: recursively upload notebooks to a given workspace path -// return nil -// } diff --git a/project/project.go b/project/project.go index d2502774..16d1e355 100644 --- a/project/project.go +++ b/project/project.go @@ -3,8 +3,15 @@ package project import ( "context" "fmt" + "os" "sync" + "github.com/databricks/bricks/lib/flavor" + "github.com/databricks/bricks/lib/flavor/mvn" + "github.com/databricks/bricks/lib/flavor/notebooks" + "github.com/databricks/bricks/lib/flavor/py" + "github.com/databricks/bricks/lib/flavor/script" + "github.com/databricks/bricks/lib/spawn" "github.com/databricks/databricks-sdk-go/databricks" "github.com/databricks/databricks-sdk-go/service/clusters" "github.com/databricks/databricks-sdk-go/service/commands" @@ -23,6 +30,9 @@ type project struct { environment *Environment wsc *workspaces.WorkspacesClient me *scim.User + + artifacts flavor.Artifacts + flavors []flavor.Flavor } // Configure is used as a PreRunE function for all commands that @@ -60,16 +70,37 @@ func Initialize(ctx context.Context, root, env string) (context.Context, error) if !ok { return nil, fmt.Errorf("environment [%s] not defined", env) } - + // TODO: maybe we do Flavor#Detect here + if config.Maven == nil { + config.Maven = &mvn.Maven{} + } + if config.SetupPy == nil { + config.SetupPy = &py.SetupDotPy{} + } + if config.Script == nil { + config.Script = &script.Script{} + } + if config.Notebooks == nil { + config.Notebooks = ¬ebooks.Notebooks{} + } p := project{ root: root, env: env, config: &config, environment: &environment, + flavors: []flavor.Flavor{ + config.Maven, + config.SetupPy, + config.Script, + config.Notebooks, + }, } p.initializeWorkspacesClient(ctx) + // make all spawn.ExecAndPassErr executed in project root + // TODO: pass verbosity flag from somewhere + ctx = spawn.WithRoot(ctx, root) return context.WithValue(ctx, &projectKey, &p), nil } @@ -131,6 +162,7 @@ func (p *project) DeploymentIsolationPrefix() string { if p.config.Isolation == None { return p.config.Name } + // TODO: git branch if p.config.Isolation == Soft { me, err := p.Me() if err != nil { @@ -200,11 +232,16 @@ func RunPythonOnDev(ctx context.Context, command string) common.CommandResults { } */ +const bricksClusterId = "BRICKS_CLUSTER_ID" + // TODO: Add safe access to p.project and p.project.DevCluster that throws errors if // the fields are not defined properly func (p *project) GetDevelopmentClusterId(ctx context.Context) (clusterId string, err error) { clusterId = p.config.DevCluster.ClusterId clusterName := p.config.DevCluster.ClusterName + if clusterId == "" { + clusterId = os.Getenv(bricksClusterId) + } if clusterId != "" { return } else if clusterName != "" { diff --git a/project/project_build.go b/project/project_build.go new file mode 100644 index 00000000..2e47919e --- /dev/null +++ b/project/project_build.go @@ -0,0 +1,146 @@ +package project + +import ( + "context" + "encoding/base64" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/databricks/bricks/lib/flavor" + "github.com/databricks/databricks-sdk-go/databricks/apierr" + "github.com/databricks/databricks-sdk-go/service/workspace" +) + +var b64 = base64.StdEncoding + +func (p *project) LocalArtifacts(ctx context.Context) (flavor.Artifacts, error) { + for _, f := range p.flavors { + if !f.Detected(p) { + continue + } + arts, err := f.LocalArtifacts(ctx, p) + if err != nil { + return nil, err + } + p.artifacts = append(p.artifacts, arts...) + } + return p.artifacts, nil +} + +type preparable interface { + Prepare(ctx context.Context, prj flavor.Project, status func(string)) error +} + +func (p *project) Prepare(ctx context.Context, status func(string)) error { + for _, f := range p.flavors { + if !f.Detected(p) { + continue + } + prep, ok := f.(preparable) + if !ok { + continue + } + err := prep.Prepare(ctx, p, status) + if err != nil { + return fmt.Errorf("prepare: %w", err) + } + } + return nil +} + +type buildable interface { + Build(ctx context.Context, prj flavor.Project, status func(string)) error +} + +func (p *project) Build(ctx context.Context, status func(string)) error { + for _, f := range p.flavors { + if !f.Detected(p) { + continue + } + b, ok := f.(buildable) + if !ok { + continue + } + // TODO: compare last modified artifact vs last modified remote + // TODO: compare last modified artifact vs last modified fileset + err := b.Build(ctx, p, status) + if err != nil { + return fmt.Errorf("build: %w", err) + } + } + return nil +} + +func (p *project) Upload(ctx context.Context, status func(string)) error { + for _, a := range p.artifacts { + err := p.upload(ctx, a, status) + if err != nil { + return fmt.Errorf("upload: %w", err) + } + } + return nil +} + +func (p *project) upload(ctx context.Context, a flavor.Artifact, status func(string)) error { + kind, local, remote := p.remotePath(a) + if remote == "" { + return nil + } + localBasename := filepath.Base(local) + remoteDir := filepath.Dir(remote) + status(fmt.Sprintf("Uploading %s to %s", localBasename, remoteDir)) + file, err := os.Open(local) + if err != nil { + return fmt.Errorf("open: %w", err) + } + defer file.Close() + if kind != flavor.LocalNotebook { + return p.wsc.Dbfs.Overwrite(ctx, remote, file) + } + format, ok := a.NotebookInfo() + if !ok { + return fmt.Errorf("unknown notebook: %s", a) + } + raw, err := io.ReadAll(file) + if err != nil { + return fmt.Errorf("read: %w", err) + } + _, err = p.wsc.Workspace.GetStatusByPath(ctx, remoteDir) + if apierr.IsMissing(err) { + err = p.wsc.Workspace.MkdirsByPath(ctx, remoteDir) + if err != nil { + return err + } + } + return p.wsc.Workspace.Import(ctx, workspace.Import{ + Path: remote, + Overwrite: format.Overwrite, + Format: format.Format, + Language: format.Language, + Content: b64.EncodeToString(raw), + }) +} + +func (p *project) remotePath(a flavor.Artifact) (flavor.Kind, string, string) { + libsFmt := fmt.Sprintf("dbfs:/FileStore/%%s/%s/%%s", p.DeploymentIsolationPrefix()) + kind, loc := a.KindAndLocation() + switch kind { + case flavor.LocalJar: + return kind, loc, fmt.Sprintf(libsFmt, "jars", filepath.Base(loc)) + case flavor.LocalWheel: + return kind, loc, fmt.Sprintf(libsFmt, "wheels", filepath.Base(loc)) + case flavor.LocalEgg: + return kind, loc, fmt.Sprintf(libsFmt, "eggs", filepath.Base(loc)) + case flavor.LocalNotebook: + me, err := p.Me() + if err != nil { + panic(err) + } + return kind, loc, fmt.Sprintf("/Users/%s/%s/%s", + me.UserName, p.config.Name, a.Notebook.RemoteRelative) + default: + return kind, loc, "" + } +} diff --git a/project/project_install.go b/project/project_install.go new file mode 100644 index 00000000..f379de09 --- /dev/null +++ b/project/project_install.go @@ -0,0 +1,134 @@ +package project + +import ( + "context" + "fmt" + "path/filepath" + "sort" + "strings" + + "github.com/databricks/bricks/lib/flavor" + "github.com/databricks/databricks-sdk-go/retries" + "github.com/databricks/databricks-sdk-go/service/clusters" + "github.com/databricks/databricks-sdk-go/service/libraries" +) + +type runsOnCluster interface { + RequiresCluster() bool +} + +type restartable interface { + RequiresRestart() bool +} + +func (p *project) RequiresCluster() bool { + for _, f := range p.flavors { + if !f.Detected(p) { + continue + } + r, ok := f.(runsOnCluster) + if !ok { + continue + } + if r.RequiresCluster() { + return true + } + } + return false +} + +func (p *project) RequiresRestart() bool { + for _, f := range p.flavors { + if !f.Detected(p) { + continue + } + r, ok := f.(restartable) + if !ok { + continue + } + if r.RequiresRestart() { + return true + } + } + return false +} + +func (p *project) Install(ctx context.Context, status func(string)) error { + if !p.RequiresCluster() { + // nothing to do + return nil + } + clusterId, err := p.GetDevelopmentClusterId(ctx) + if err != nil { + // cluster not found is also fine, abort execution + return nil + } + info, err := p.wsc.Clusters.GetByClusterId(ctx, clusterId) + if err != nil { + // TODO: special behavior for (auto)deleted clusters + // re-create, if possible? + return err + } + if p.RequiresRestart() && info.IsRunningOrResizing() { + _, err = p.wsc.Clusters.RestartAndWait(ctx, clusters.RestartCluster{ + ClusterId: clusterId, + }, func(i *retries.Info[clusters.ClusterInfo]) { + status(i.Info.StateMessage) + }) + } else if !info.IsRunningOrResizing() { + _, err = p.wsc.Clusters.StartByClusterIdAndWait(ctx, clusterId, + func(i *retries.Info[clusters.ClusterInfo]) { + status(i.Info.StateMessage) + }) + } + if err != nil { + return err + } + if !p.artifacts.HasLibraries() { + return nil + } + var libs []libraries.Library + for _, a := range p.artifacts { + k, _, remote := p.remotePath(a) + switch k { + case flavor.LocalJar: + libs = append(libs, libraries.Library{Jar: remote}) + case flavor.LocalWheel: + libs = append(libs, libraries.Library{Whl: remote}) + case flavor.LocalEgg: + libs = append(libs, libraries.Library{Egg: remote}) + case flavor.RegistryLibrary: + libs = append(libs, a.Library) + default: + continue + } + } + // TODO: uninstall previous versions of libraries + return p.wsc.Libraries.UpdateAndWait(ctx, libraries.Update{ + ClusterId: clusterId, + Install: libs, + }, func(i *retries.Info[libraries.ClusterLibraryStatuses]) { + byStatus := map[string][]string{} + for _, lib := range i.Info.LibraryStatuses { + if lib.IsLibraryForAllClusters { + continue + } + if lib.Status == libraries.LibraryFullStatusStatusInstalled || + lib.Status == libraries.LibraryFullStatusStatusUninstallOnRestart { + continue + } + name := lib.Library.String() + if strings.HasPrefix(name, "jar:dbfs:") || strings.HasPrefix(name, "whl:dbfs:") { + name = filepath.Base(name) + } + byStatus[string(lib.Status)] = append(byStatus[string(lib.Status)], name) + } + msg := []string{} + for k, v := range byStatus { + sort.Strings(v) + msg = append(msg, fmt.Sprintf("%s (%s)", k, strings.Join(v, ", "))) + } + sort.Strings(msg) + status(strings.Join(msg, ", ")) + }) +} diff --git a/python/env_test.go b/python/env_test.go deleted file mode 100644 index 614d1832..00000000 --- a/python/env_test.go +++ /dev/null @@ -1,24 +0,0 @@ -package python - -import ( - "context" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestFreeze(t *testing.T) { - env, err := Freeze(context.Background()) - assert.NoError(t, err) - assert.Greater(t, len(env), 1) - assert.True(t, env.Has("urllib3")) -} - -func TestPyInlineX(t *testing.T) { - defer chdirAndBack("testdata/simple-python-wheel")() - dist, err := ReadDistribution(context.Background()) - assert.NoError(t, err) - assert.Equal(t, "dummy", dist.Name) - assert.Equal(t, "dummy", dist.Packages[0]) - assert.True(t, dist.InstallEnvironment().Has("requests")) -} diff --git a/python/runner.go b/python/runner.go index 6145da27..f03a44d8 100644 --- a/python/runner.go +++ b/python/runner.go @@ -5,21 +5,23 @@ import ( "errors" "fmt" "os" - "os/exec" - "runtime" "strings" + + "github.com/databricks/bricks/lib/spawn" + "github.com/databricks/databricks-sdk-go/service/commands" ) func PyInline(ctx context.Context, inlinePy string) (string, error) { - return Py(ctx, "-c", TrimLeadingWhitespace(inlinePy)) + return Py(ctx, "-c", commands.TrimLeadingWhitespace(inlinePy)) } -func Py(ctx context.Context, script string, args ...string) (string, error) { +// Py calls system-detected Python3 executable +func Py(ctx context.Context, args ...string) (string, error) { py, err := detectExecutable(ctx) if err != nil { return "", err } - out, err := execAndPassErr(ctx, py, append([]string{script}, args...)...) + out, err := spawn.ExecAndPassErr(ctx, py, args...) if err != nil { // current error message chain is longer: // failed to call {pyExec} __non_existing__.py: {pyExec}: can't open @@ -28,7 +30,7 @@ func Py(ctx context.Context, script string, args ...string) (string, error) { // can't open file '$PWD/__non_existing__.py': [Errno 2] No such file or directory return "", err } - return trimmedS(out), nil + return strings.Trim(string(out), "\n\r"), nil } func createVirtualEnv(ctx context.Context) error { @@ -38,11 +40,7 @@ func createVirtualEnv(ctx context.Context) error { // python3 -m build -w // https://packaging.python.org/en/latest/tutorials/packaging-projects/ -func detectVirtualEnv() (string, error) { - wd, err := os.Getwd() - if err != nil { - return "", err - } +func detectVirtualEnv(wd string) (string, error) { wdf, err := os.Open(wd) if err != nil { return "", err @@ -74,69 +72,10 @@ func detectExecutable(ctx context.Context) (string, error) { if pyExec != "" { return pyExec, nil } - detector := "which" - if runtime.GOOS == "windows" { - detector = "where.exe" - } - out, err := execAndPassErr(ctx, detector, "python3") + detected, err := spawn.DetectExecutable(ctx, "python3") if err != nil { return "", err } - pyExec = trimmedS(out) + pyExec = detected return pyExec, nil } - -func execAndPassErr(ctx context.Context, name string, args ...string) ([]byte, error) { - // TODO: move out to a separate package, once we have Maven integration - out, err := exec.CommandContext(ctx, name, args...).Output() - return out, nicerErr(err) -} - -func nicerErr(err error) error { - if err == nil { - return nil - } - if ee, ok := err.(*exec.ExitError); ok { - errMsg := trimmedS(ee.Stderr) - if errMsg == "" { - errMsg = err.Error() - } - return errors.New(errMsg) - } - return err -} - -func trimmedS(bytes []byte) string { - return strings.Trim(string(bytes), "\n\r") -} - -// TrimLeadingWhitespace removes leading whitespace -// function copied from Databricks Terraform provider -func TrimLeadingWhitespace(commandStr string) (newCommand string) { - lines := strings.Split(strings.ReplaceAll(commandStr, "\t", " "), "\n") - leadingWhitespace := 1<<31 - 1 - for _, line := range lines { - for pos, char := range line { - if char == ' ' || char == '\t' { - continue - } - // first non-whitespace character - if pos < leadingWhitespace { - leadingWhitespace = pos - } - // is not needed further - break - } - } - for i := 0; i < len(lines); i++ { - if lines[i] == "" || strings.Trim(lines[i], " \t") == "" { - continue - } - if len(lines[i]) < leadingWhitespace { - newCommand += lines[i] + "\n" // or not.. - } else { - newCommand += lines[i][leadingWhitespace:] + "\n" - } - } - return -} diff --git a/python/runner_test.go b/python/runner_test.go index 6eea2abd..f06a2661 100644 --- a/python/runner_test.go +++ b/python/runner_test.go @@ -6,14 +6,10 @@ import ( "os" "testing" + "github.com/databricks/bricks/lib/spawn" "github.com/stretchr/testify/assert" ) -func TestExecAndPassError(t *testing.T) { - _, err := execAndPassErr(context.Background(), "which", "__non_existing__") - assert.EqualError(t, err, "exit status 1") -} - func TestDetectPython(t *testing.T) { pyExec = "" py, err := detectExecutable(context.Background()) @@ -30,37 +26,25 @@ func TestDetectPythonCache(t *testing.T) { } func TestDetectVirtualEnvFalse(t *testing.T) { - venvDir, err := detectVirtualEnv() + wd, err := os.Getwd() + assert.NoError(t, err) + venvDir, err := detectVirtualEnv(wd) assert.NoError(t, err) assert.Equal(t, "", venvDir) } func TestMakeDetectableVenv(t *testing.T) { - var temp string - defer testTempdir(t, &temp)() + temp := t.TempDir() + ctx := spawn.WithRoot(context.Background(), temp) - // TODO: rewrite with t.TempDir() and arguments - err := createVirtualEnv(context.Background()) + err := createVirtualEnv(ctx) assert.NoError(t, err) - venv, err := detectVirtualEnv() + venv, err := detectVirtualEnv(temp) assert.NoError(t, err) assert.Equal(t, fmt.Sprintf("%s/.venv", temp), venv) } -func testTempdir(t *testing.T, dir *string) func() { - wd, _ := os.Getwd() - temp, err := os.MkdirTemp(os.TempDir(), "brickstest") - assert.NoError(t, err) - os.Chdir(temp) - wd2, _ := os.Getwd() - *dir = wd2 - return func() { - os.Chdir(wd) - os.RemoveAll(temp) - } -} - func TestPyError(t *testing.T) { _, err := Py(context.Background(), "__non_existing__.py") assert.Contains(t, err.Error(), "can't open file") diff --git a/utilities/dbfs.go b/utilities/dbfs.go index 985fac5a..f8b5268b 100644 --- a/utilities/dbfs.go +++ b/utilities/dbfs.go @@ -13,6 +13,8 @@ import ( // move to go sdk / replace with utility function once // https://github.com/databricks/databricks-sdk-go/issues/57 is Done // Tracked in https://github.com/databricks/bricks/issues/25 +// +// Deprecated: use w.Dbfs.Overwrite func CreateDbfsFile(ctx context.Context, wsc *workspaces.WorkspacesClient, path string, @@ -59,6 +61,7 @@ func CreateDbfsFile(ctx context.Context, return nil } +// Deprecated: use w.Dbfs.Open func ReadDbfsFile(ctx context.Context, wsc *workspaces.WorkspacesClient, path string,