From 9a88fa602d47f96953d7217530ee498a2f21b7a3 Mon Sep 17 00:00:00 2001 From: Andrew Nester Date: Tue, 25 Jul 2023 13:35:08 +0200 Subject: [PATCH] Added support for artifacts building for bundles (#583) ## Changes Added support for artifacts building for bundles. Now it allows to specify `artifacts` block in bundle.yml and define a resource (at the moment Python wheel) to be build and uploaded during `bundle deploy` Built artifact will be automatically attached to corresponding job task or pipeline where it's used as a library Follow-ups: 1. If artifact is used in job or pipeline, but not found in the config, try to infer and build it anyway 2. If build command is not provided for Python wheel artifact, infer it --- bundle/artifacts/artifacts.go | 167 ++++++++++++++++++ bundle/artifacts/build.go | 22 ++- bundle/artifacts/notebook/build.go | 81 --------- bundle/artifacts/notebook/marker.go | 29 --- bundle/artifacts/notebook/upload.go | 60 ------- bundle/artifacts/upload.go | 35 +++- bundle/artifacts/whl/build.go | 66 +++++++ bundle/config/artifact.go | 76 ++++++-- bundle/libraries/libraries.go | 107 +++++++++++ bundle/phases/deploy.go | 3 + bundle/tests/bundle/python_wheel/.gitignore | 3 + bundle/tests/bundle/python_wheel/bundle.yml | 19 ++ .../bundle/python_wheel/my_test_code/setup.py | 15 ++ .../python_wheel/my_test_code/src/__init__.py | 2 + .../python_wheel/my_test_code/src/__main__.py | 16 ++ bundle/tests/bundle/wheel_test.go | 26 +++ python/utils.go | 48 +++++ python/wheel.go | 46 +---- 18 files changed, 596 insertions(+), 225 deletions(-) create mode 100644 bundle/artifacts/artifacts.go delete mode 100644 bundle/artifacts/notebook/build.go delete mode 100644 bundle/artifacts/notebook/marker.go delete mode 100644 bundle/artifacts/notebook/upload.go create mode 100644 bundle/artifacts/whl/build.go create mode 100644 bundle/libraries/libraries.go create mode 100644 bundle/tests/bundle/python_wheel/.gitignore create mode 100644 bundle/tests/bundle/python_wheel/bundle.yml create mode 100644 bundle/tests/bundle/python_wheel/my_test_code/setup.py create mode 100644 bundle/tests/bundle/python_wheel/my_test_code/src/__init__.py create mode 100644 bundle/tests/bundle/python_wheel/my_test_code/src/__main__.py create mode 100644 bundle/tests/bundle/wheel_test.go create mode 100644 python/utils.go diff --git a/bundle/artifacts/artifacts.go b/bundle/artifacts/artifacts.go new file mode 100644 index 00000000..c5413121 --- /dev/null +++ b/bundle/artifacts/artifacts.go @@ -0,0 +1,167 @@ +package artifacts + +import ( + "context" + "crypto/sha256" + "encoding/base64" + "errors" + "fmt" + "os" + "path" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/artifacts/whl" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go/service/workspace" +) + +type mutatorFactory = func(name string) bundle.Mutator + +var buildMutators map[config.ArtifactType]mutatorFactory = map[config.ArtifactType]mutatorFactory{ + config.ArtifactPythonWheel: whl.Build, +} + +var uploadMutators map[config.ArtifactType]mutatorFactory = map[config.ArtifactType]mutatorFactory{} + +func getBuildMutator(t config.ArtifactType, name string) bundle.Mutator { + mutatorFactory, ok := buildMutators[t] + if !ok { + mutatorFactory = BasicBuild + } + + return mutatorFactory(name) +} + +func getUploadMutator(t config.ArtifactType, name string) bundle.Mutator { + mutatorFactory, ok := uploadMutators[t] + if !ok { + mutatorFactory = BasicUpload + } + + return mutatorFactory(name) +} + +// Basic Build defines a general build mutator which builds artifact based on artifact.BuildCommand +type basicBuild struct { + name string +} + +func BasicBuild(name string) bundle.Mutator { + return &basicBuild{name: name} +} + +func (m *basicBuild) Name() string { + return fmt.Sprintf("artifacts.Build(%s)", m.name) +} + +func (m *basicBuild) Apply(ctx context.Context, b *bundle.Bundle) error { + artifact, ok := b.Config.Artifacts[m.name] + if !ok { + return fmt.Errorf("artifact doesn't exist: %s", m.name) + } + + cmdio.LogString(ctx, fmt.Sprintf("artifacts.Build(%s): Building...", m.name)) + + out, err := artifact.Build(ctx) + if err != nil { + return fmt.Errorf("artifacts.Build(%s): %w, output: %s", m.name, err, out) + } + cmdio.LogString(ctx, fmt.Sprintf("artifacts.Build(%s): Build succeeded", m.name)) + + return nil +} + +// Basic Upload defines a general upload mutator which uploads artifact as a library to workspace +type basicUpload struct { + name string +} + +func BasicUpload(name string) bundle.Mutator { + return &basicUpload{name: name} +} + +func (m *basicUpload) Name() string { + return fmt.Sprintf("artifacts.Build(%s)", m.name) +} + +func (m *basicUpload) Apply(ctx context.Context, b *bundle.Bundle) error { + artifact, ok := b.Config.Artifacts[m.name] + if !ok { + return fmt.Errorf("artifact doesn't exist: %s", m.name) + } + + if len(artifact.Files) == 0 { + return fmt.Errorf("artifact source is not configured: %s", m.name) + } + + err := uploadArtifact(ctx, artifact, b) + if err != nil { + return fmt.Errorf("artifacts.Upload(%s): %w", m.name, err) + } + + return nil +} + +func uploadArtifact(ctx context.Context, a *config.Artifact, b *bundle.Bundle) error { + for i := range a.Files { + f := &a.Files[i] + if f.NeedsUpload() { + filename := path.Base(f.Source) + cmdio.LogString(ctx, fmt.Sprintf("artifacts.Upload(%s): Uploading...", filename)) + remotePath, err := uploadArtifactFile(ctx, f.Source, b) + if err != nil { + return err + } + cmdio.LogString(ctx, fmt.Sprintf("artifacts.Upload(%s): Upload succeeded", filename)) + + f.RemotePath = remotePath + } + } + + a.NormalisePaths() + return nil +} + +// Function to upload artifact file to Workspace +func uploadArtifactFile(ctx context.Context, file string, b *bundle.Bundle) (string, error) { + raw, err := os.ReadFile(file) + if err != nil { + return "", fmt.Errorf("unable to read %s: %w", file, errors.Unwrap(err)) + } + + uploadPath, err := getUploadBasePath(b) + if err != nil { + return "", err + } + + fileHash := sha256.Sum256(raw) + remotePath := path.Join(uploadPath, fmt.Sprintf("%x", fileHash), path.Base(file)) + // Make sure target directory exists. + err = b.WorkspaceClient().Workspace.MkdirsByPath(ctx, path.Dir(remotePath)) + if err != nil { + return "", fmt.Errorf("unable to create directory for %s: %w", remotePath, err) + } + + // Import to workspace. + err = b.WorkspaceClient().Workspace.Import(ctx, workspace.Import{ + Path: remotePath, + Overwrite: true, + Format: workspace.ImportFormatAuto, + Content: base64.StdEncoding.EncodeToString(raw), + }) + if err != nil { + return "", fmt.Errorf("unable to import %s: %w", remotePath, err) + } + + return remotePath, nil +} + +func getUploadBasePath(b *bundle.Bundle) (string, error) { + artifactPath := b.Config.Workspace.ArtifactsPath + if artifactPath == "" { + return "", fmt.Errorf("remote artifact path not configured") + } + + return path.Join(artifactPath, ".internal"), nil +} diff --git a/bundle/artifacts/build.go b/bundle/artifacts/build.go index 294351f4..7721635a 100644 --- a/bundle/artifacts/build.go +++ b/bundle/artifacts/build.go @@ -3,9 +3,9 @@ package artifacts import ( "context" "fmt" + "path/filepath" "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/artifacts/notebook" ) func BuildAll() bundle.Mutator { @@ -33,9 +33,23 @@ func (m *build) Apply(ctx context.Context, b *bundle.Bundle) error { return fmt.Errorf("artifact doesn't exist: %s", m.name) } - if artifact.Notebook != nil { - return bundle.Apply(ctx, b, notebook.Build(m.name)) + if len(artifact.Files) == 0 && artifact.BuildCommand == "" { + return fmt.Errorf("artifact %s misconfigured: 'files' or 'build' property is required", m.name) } - return nil + // If artifact file is explicitly defined, skip building the artifact + if len(artifact.Files) != 0 { + return nil + } + + // If artifact path is not provided, use bundle root dir + if artifact.Path == "" { + artifact.Path = b.Config.Path + } + + if !filepath.IsAbs(artifact.Path) { + artifact.Path = filepath.Join(b.Config.Path, artifact.Path) + } + + return bundle.Apply(ctx, b, getBuildMutator(artifact.Type, m.name)) } diff --git a/bundle/artifacts/notebook/build.go b/bundle/artifacts/notebook/build.go deleted file mode 100644 index 4a25868a..00000000 --- a/bundle/artifacts/notebook/build.go +++ /dev/null @@ -1,81 +0,0 @@ -package notebook - -import ( - "context" - "errors" - "fmt" - "os" - "path" - "path/filepath" - "strings" - - "github.com/databricks/cli/bundle" - "github.com/databricks/databricks-sdk-go/service/workspace" -) - -type build struct { - name string -} - -func Build(name string) bundle.Mutator { - return &build{ - name: name, - } -} - -func (m *build) Name() string { - return fmt.Sprintf("notebook.Build(%s)", m.name) -} - -func (m *build) Apply(_ context.Context, b *bundle.Bundle) error { - a, ok := b.Config.Artifacts[m.name] - if !ok { - return fmt.Errorf("artifact doesn't exist: %s", m.name) - } - - artifact := a.Notebook - - // Check if the filetype is supported. - switch ext := strings.ToLower(filepath.Ext(artifact.Path)); ext { - case ".py": - artifact.Language = workspace.LanguagePython - case ".scala": - artifact.Language = workspace.LanguageScala - case ".sql": - artifact.Language = workspace.LanguageSql - default: - return fmt.Errorf("invalid notebook extension: %s", ext) - } - - // Open underlying file. - f, err := os.Open(filepath.Join(b.Config.Path, artifact.Path)) - if err != nil { - return fmt.Errorf("unable to open artifact file %s: %w", artifact.Path, errors.Unwrap(err)) - } - defer f.Close() - - // Check that the file contains the notebook marker on its first line. - ok, err = hasMarker(artifact.Language, f) - if err != nil { - return fmt.Errorf("unable to read artifact file %s: %s", artifact.Path, errors.Unwrap(err)) - } - if !ok { - return fmt.Errorf("notebook marker not found in %s", artifact.Path) - } - - // Check that an artifact path is defined. - remotePath := b.Config.Workspace.ArtifactsPath - if remotePath == "" { - return fmt.Errorf("remote artifact path not configured") - } - - // Store absolute paths. - artifact.LocalPath = filepath.Join(b.Config.Path, artifact.Path) - artifact.RemotePath = path.Join(remotePath, stripExtension(artifact.Path)) - return nil -} - -func stripExtension(path string) string { - ext := filepath.Ext(path) - return path[0 : len(path)-len(ext)] -} diff --git a/bundle/artifacts/notebook/marker.go b/bundle/artifacts/notebook/marker.go deleted file mode 100644 index a04ca989..00000000 --- a/bundle/artifacts/notebook/marker.go +++ /dev/null @@ -1,29 +0,0 @@ -package notebook - -import ( - "bufio" - "io" - "strings" - - "github.com/databricks/databricks-sdk-go/service/workspace" -) - -func hasMarker(l workspace.Language, r io.Reader) (bool, error) { - scanner := bufio.NewScanner(r) - ok := scanner.Scan() - if !ok { - return false, scanner.Err() - } - - line := strings.TrimSpace(scanner.Text()) - switch l { - case workspace.LanguagePython: - return line == "# Databricks notebook source", nil - case workspace.LanguageScala: - return line == "// Databricks notebook source", nil - case workspace.LanguageSql: - return line == "-- Databricks notebook source", nil - default: - panic("language not handled: " + l) - } -} diff --git a/bundle/artifacts/notebook/upload.go b/bundle/artifacts/notebook/upload.go deleted file mode 100644 index 38ac9d61..00000000 --- a/bundle/artifacts/notebook/upload.go +++ /dev/null @@ -1,60 +0,0 @@ -package notebook - -import ( - "context" - "encoding/base64" - "errors" - "fmt" - "os" - "path" - - "github.com/databricks/cli/bundle" - "github.com/databricks/databricks-sdk-go/service/workspace" -) - -type upload struct { - name string -} - -func Upload(name string) bundle.Mutator { - return &upload{ - name: name, - } -} - -func (m *upload) Name() string { - return fmt.Sprintf("notebook.Upload(%s)", m.name) -} - -func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) error { - a, ok := b.Config.Artifacts[m.name] - if !ok { - return fmt.Errorf("artifact doesn't exist: %s", m.name) - } - - artifact := a.Notebook - raw, err := os.ReadFile(artifact.LocalPath) - if err != nil { - return fmt.Errorf("unable to read %s: %w", m.name, errors.Unwrap(err)) - } - - // Make sure target directory exists. - err = b.WorkspaceClient().Workspace.MkdirsByPath(ctx, path.Dir(artifact.RemotePath)) - if err != nil { - return fmt.Errorf("unable to create directory for %s: %w", m.name, err) - } - - // Import to workspace. - err = b.WorkspaceClient().Workspace.Import(ctx, workspace.Import{ - Path: artifact.RemotePath, - Overwrite: true, - Format: workspace.ImportFormatSource, - Language: artifact.Language, - Content: base64.StdEncoding.EncodeToString(raw), - }) - if err != nil { - return fmt.Errorf("unable to import %s: %w", m.name, err) - } - - return nil -} diff --git a/bundle/artifacts/upload.go b/bundle/artifacts/upload.go index f5ce2b23..990718aa 100644 --- a/bundle/artifacts/upload.go +++ b/bundle/artifacts/upload.go @@ -5,7 +5,7 @@ import ( "fmt" "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/artifacts/notebook" + "github.com/databricks/databricks-sdk-go/service/workspace" ) func UploadAll() bundle.Mutator { @@ -15,6 +15,10 @@ func UploadAll() bundle.Mutator { } } +func CleanUp() bundle.Mutator { + return &cleanUp{} +} + type upload struct { name string } @@ -33,8 +37,33 @@ func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) error { return fmt.Errorf("artifact doesn't exist: %s", m.name) } - if artifact.Notebook != nil { - return bundle.Apply(ctx, b, notebook.Upload(m.name)) + if len(artifact.Files) == 0 { + return fmt.Errorf("artifact source is not configured: %s", m.name) + } + + return bundle.Apply(ctx, b, getUploadMutator(artifact.Type, m.name)) +} + +type cleanUp struct{} + +func (m *cleanUp) Name() string { + return "artifacts.CleanUp" +} + +func (m *cleanUp) Apply(ctx context.Context, b *bundle.Bundle) error { + uploadPath, err := getUploadBasePath(b) + if err != nil { + return err + } + + b.WorkspaceClient().Workspace.Delete(ctx, workspace.Delete{ + Path: uploadPath, + Recursive: true, + }) + + err = b.WorkspaceClient().Workspace.MkdirsByPath(ctx, uploadPath) + if err != nil { + return fmt.Errorf("unable to create directory for %s: %w", uploadPath, err) } return nil diff --git a/bundle/artifacts/whl/build.go b/bundle/artifacts/whl/build.go new file mode 100644 index 00000000..4ee47153 --- /dev/null +++ b/bundle/artifacts/whl/build.go @@ -0,0 +1,66 @@ +package whl + +import ( + "context" + "fmt" + "os" + "path/filepath" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/python" +) + +type build struct { + name string +} + +func Build(name string) bundle.Mutator { + return &build{ + name: name, + } +} + +func (m *build) Name() string { + return fmt.Sprintf("artifacts.whl.Build(%s)", m.name) +} + +func (m *build) Apply(ctx context.Context, b *bundle.Bundle) error { + artifact, ok := b.Config.Artifacts[m.name] + if !ok { + return fmt.Errorf("artifact doesn't exist: %s", m.name) + } + + // TODO: If not set, BuildCommand should be infer prior to this + // via a mutator so that it can be observable. + if artifact.BuildCommand == "" { + return fmt.Errorf("artifacts.whl.Build(%s): missing build property for the artifact", m.name) + } + + cmdio.LogString(ctx, fmt.Sprintf("artifacts.whl.Build(%s): Building...", m.name)) + + dir := artifact.Path + + distPath := filepath.Join(dir, "dist") + os.RemoveAll(distPath) + python.CleanupWheelFolder(dir) + + out, err := artifact.Build(ctx) + if err != nil { + return fmt.Errorf("artifacts.whl.Build(%s): Failed %w, output: %s", m.name, err, out) + } + cmdio.LogString(ctx, fmt.Sprintf("artifacts.whl.Build(%s): Build succeeded", m.name)) + + wheels := python.FindFilesWithSuffixInPath(distPath, ".whl") + if len(wheels) == 0 { + return fmt.Errorf("artifacts.whl.Build(%s): cannot find built wheel in %s", m.name, dir) + } + for _, wheel := range wheels { + artifact.Files = append(artifact.Files, config.ArtifactFile{ + Source: wheel, + }) + } + + return nil +} diff --git a/bundle/config/artifact.go b/bundle/config/artifact.go index f782fcfc..1ac371e9 100644 --- a/bundle/config/artifact.go +++ b/bundle/config/artifact.go @@ -1,20 +1,76 @@ package config -import "github.com/databricks/databricks-sdk-go/service/workspace" +import ( + "context" + "fmt" + "os/exec" + "path" + "strings" + + "github.com/databricks/databricks-sdk-go/service/compute" +) + +type ArtifactType string + +const ArtifactPythonWheel ArtifactType = `whl` + +type ArtifactFile struct { + Source string `json:"source"` + RemotePath string `json:"-" bundle:"readonly"` + Libraries []*compute.Library `json:"-" bundle:"readonly"` +} // Artifact defines a single local code artifact that can be // built/uploaded/referenced in the context of this bundle. type Artifact struct { - Notebook *NotebookArtifact `json:"notebook,omitempty"` -} + Type ArtifactType `json:"type"` -type NotebookArtifact struct { + // The local path to the directory with a root of artifact, + // for example, where setup.py is for Python projects Path string `json:"path"` - // Language is detected during build step. - Language workspace.Language `json:"language,omitempty" bundle:"readonly"` - - // Paths are synthesized during build step. - LocalPath string `json:"local_path,omitempty" bundle:"readonly"` - RemotePath string `json:"remote_path,omitempty" bundle:"readonly"` + // The relative or absolute path to the built artifact files + // (Python wheel, Java jar and etc) itself + Files []ArtifactFile `json:"files"` + BuildCommand string `json:"build"` +} + +func (a *Artifact) Build(ctx context.Context) ([]byte, error) { + if a.BuildCommand == "" { + return nil, fmt.Errorf("no build property defined") + } + + buildParts := strings.Split(a.BuildCommand, " ") + cmd := exec.CommandContext(ctx, buildParts[0], buildParts[1:]...) + cmd.Dir = a.Path + return cmd.CombinedOutput() +} + +func (a *Artifact) NormalisePaths() { + for _, f := range a.Files { + // If no libraries attached, nothing to normalise, skipping + if f.Libraries == nil { + continue + } + + wsfsBase := "/Workspace" + remotePath := path.Join(wsfsBase, f.RemotePath) + for i := range f.Libraries { + lib := f.Libraries[i] + switch a.Type { + case ArtifactPythonWheel: + lib.Whl = remotePath + } + } + + } +} + +// This function determines if artifact files needs to be uploaded. +// During the bundle processing we analyse which library uses which artifact file. +// If artifact file is used as a library, we store the reference to this library in artifact file Libraries field. +// If artifact file has libraries it's been used in, it means than we need to upload this file. +// Otherwise this artifact file is not used and we skip uploading +func (af *ArtifactFile) NeedsUpload() bool { + return af.Libraries != nil } diff --git a/bundle/libraries/libraries.go b/bundle/libraries/libraries.go new file mode 100644 index 00000000..ff86a34b --- /dev/null +++ b/bundle/libraries/libraries.go @@ -0,0 +1,107 @@ +package libraries + +import ( + "context" + "fmt" + "path/filepath" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/databricks/databricks-sdk-go/service/jobs" +) + +type match struct { +} + +func MatchWithArtifacts() bundle.Mutator { + return &match{} +} + +func (a *match) Name() string { + return "libraries.MatchWithArtifacts" +} + +func (a *match) Apply(ctx context.Context, b *bundle.Bundle) error { + r := b.Config.Resources + for k := range b.Config.Resources.Jobs { + tasks := r.Jobs[k].JobSettings.Tasks + for i := range tasks { + task := &tasks[i] + if isMissingRequiredLibraries(task) { + return fmt.Errorf("task '%s' is missing required libraries. Please include your package code in task libraries block", task.TaskKey) + } + for j := range task.Libraries { + lib := &task.Libraries[j] + err := findArtifactsAndMarkForUpload(ctx, lib, b) + if err != nil { + return err + } + } + } + } + return nil +} + +func isMissingRequiredLibraries(task *jobs.Task) bool { + if task.Libraries != nil { + return false + } + + return task.PythonWheelTask != nil || task.SparkJarTask != nil +} + +func findLibraryMatches(lib *compute.Library, b *bundle.Bundle) ([]string, error) { + path := libPath(lib) + if path == "" { + return nil, nil + } + + fullPath := filepath.Join(b.Config.Path, path) + return filepath.Glob(fullPath) +} + +func findArtifactsAndMarkForUpload(ctx context.Context, lib *compute.Library, b *bundle.Bundle) error { + matches, err := findLibraryMatches(lib, b) + if err != nil { + return err + } + + for _, match := range matches { + af, err := findArtifactFileByLocalPath(match, b) + if err != nil { + cmdio.LogString(ctx, fmt.Sprintf("%s. Skipping %s. In order to use the library upload it manually", err.Error(), match)) + } else { + af.Libraries = append(af.Libraries, lib) + } + } + + return nil +} + +func findArtifactFileByLocalPath(path string, b *bundle.Bundle) (*config.ArtifactFile, error) { + for _, a := range b.Config.Artifacts { + for k := range a.Files { + if a.Files[k].Source == path { + return &a.Files[k], nil + } + } + } + + return nil, fmt.Errorf("artifact file is not found for path %s", path) +} + +func libPath(library *compute.Library) string { + if library.Whl != "" { + return library.Whl + } + if library.Jar != "" { + return library.Jar + } + if library.Egg != "" { + return library.Egg + } + + return "" +} diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index f2692ea9..8b53273c 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -6,6 +6,7 @@ import ( "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" "github.com/databricks/cli/bundle/deploy/terraform" + "github.com/databricks/cli/bundle/libraries" ) // The deploy phase deploys artifacts and resources. @@ -15,6 +16,8 @@ func Deploy() bundle.Mutator { bundle.Defer( bundle.Seq( files.Upload(), + libraries.MatchWithArtifacts(), + artifacts.CleanUp(), artifacts.UploadAll(), terraform.Interpolate(), terraform.Write(), diff --git a/bundle/tests/bundle/python_wheel/.gitignore b/bundle/tests/bundle/python_wheel/.gitignore new file mode 100644 index 00000000..f03e23bc --- /dev/null +++ b/bundle/tests/bundle/python_wheel/.gitignore @@ -0,0 +1,3 @@ +build/ +*.egg-info +.databricks diff --git a/bundle/tests/bundle/python_wheel/bundle.yml b/bundle/tests/bundle/python_wheel/bundle.yml new file mode 100644 index 00000000..9c518589 --- /dev/null +++ b/bundle/tests/bundle/python_wheel/bundle.yml @@ -0,0 +1,19 @@ +bundle: + name: python-wheel + +artifacts: + my_test_code: + type: whl + path: "./my_test_code" + build: "/usr/local/bin/python setup.py bdist_wheel" + +resources: + jobs: + test_job: + name: "[${bundle.environment}] My Wheel Job" + tasks: + - task_key: TestTask + existing_cluster_id: "0717-132531-5opeqon1" + python_wheel_task: + package_name: "my_test_code" + entry_point: "run" diff --git a/bundle/tests/bundle/python_wheel/my_test_code/setup.py b/bundle/tests/bundle/python_wheel/my_test_code/setup.py new file mode 100644 index 00000000..0bd871dd --- /dev/null +++ b/bundle/tests/bundle/python_wheel/my_test_code/setup.py @@ -0,0 +1,15 @@ +from setuptools import setup, find_packages + +import src + +setup( + name="my_test_code", + version=src.__version__, + author=src.__author__, + url="https://databricks.com", + author_email="john.doe@databricks.com", + description="my test wheel", + packages=find_packages(include=["src"]), + entry_points={"group_1": "run=src.__main__:main"}, + install_requires=["setuptools"], +) diff --git a/bundle/tests/bundle/python_wheel/my_test_code/src/__init__.py b/bundle/tests/bundle/python_wheel/my_test_code/src/__init__.py new file mode 100644 index 00000000..909f1f32 --- /dev/null +++ b/bundle/tests/bundle/python_wheel/my_test_code/src/__init__.py @@ -0,0 +1,2 @@ +__version__ = "0.0.1" +__author__ = "Databricks" diff --git a/bundle/tests/bundle/python_wheel/my_test_code/src/__main__.py b/bundle/tests/bundle/python_wheel/my_test_code/src/__main__.py new file mode 100644 index 00000000..73d045af --- /dev/null +++ b/bundle/tests/bundle/python_wheel/my_test_code/src/__main__.py @@ -0,0 +1,16 @@ +""" +The entry point of the Python Wheel +""" + +import sys + + +def main(): + # This method will print the provided arguments + print('Hello from my func') + print('Got arguments:') + print(sys.argv) + + +if __name__ == '__main__': + main() diff --git a/bundle/tests/bundle/wheel_test.go b/bundle/tests/bundle/wheel_test.go new file mode 100644 index 00000000..9a6b2fd2 --- /dev/null +++ b/bundle/tests/bundle/wheel_test.go @@ -0,0 +1,26 @@ +package bundle + +import ( + "context" + "os" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/phases" + "github.com/databricks/cli/internal" + "github.com/stretchr/testify/require" +) + +func TestAccBundlePythonWheelBuild(t *testing.T) { + t.Log(internal.GetEnvOrSkipTest(t, "CLOUD_ENV")) + + b, err := bundle.Load("./python_wheel") + require.NoError(t, err) + + m := phases.Build() + err = m.Apply(context.Background(), b) + require.NoError(t, err) + + _, err = os.Stat("./python_wheel/my_test_code/dist/my_test_code-0.0.1-py2-none-any.whl") + require.NoError(t, err) +} diff --git a/python/utils.go b/python/utils.go new file mode 100644 index 00000000..10654edc --- /dev/null +++ b/python/utils.go @@ -0,0 +1,48 @@ +package python + +// TODO: move this package into the libs + +import ( + "context" + "os" + "path" + "strings" + + "github.com/databricks/cli/libs/log" +) + +func CleanupWheelFolder(dir string) { + // there or not there - we don't care + os.RemoveAll(path.Join(dir, "__pycache__")) + os.RemoveAll(path.Join(dir, "build")) + eggInfo := FindFilesWithSuffixInPath(dir, ".egg-info") + if len(eggInfo) == 0 { + return + } + for _, f := range eggInfo { + os.RemoveAll(f) + } +} + +func FindFilesWithSuffixInPath(dir, suffix string) []string { + f, err := os.Open(dir) + if err != nil { + log.Debugf(context.Background(), "open dir %s: %s", dir, err) + return nil + } + entries, err := f.ReadDir(0) + if err != nil { + log.Debugf(context.Background(), "read dir %s: %s", dir, err) + // todo: log + return nil + } + + files := make([]string, 0) + for _, child := range entries { + if !strings.HasSuffix(child.Name(), suffix) { + continue + } + files = append(files, path.Join(dir, child.Name())) + } + return files +} diff --git a/python/wheel.go b/python/wheel.go index ff05509d..39c3d4cb 100644 --- a/python/wheel.go +++ b/python/wheel.go @@ -6,7 +6,6 @@ import ( "io" "os" "path" - "strings" "github.com/databricks/cli/libs/log" "github.com/databricks/databricks-sdk-go" @@ -18,7 +17,7 @@ func BuildWheel(ctx context.Context, dir string) (string, error) { // remove previous dist leak os.RemoveAll("dist") // remove all other irrelevant traces - silentlyCleanupWheelFolder(".") + CleanupWheelFolder(".") // call simple wheel builder. we may need to pip install wheel as well out, err := Py(ctx, "setup.py", "bdist_wheel") if err != nil { @@ -27,13 +26,16 @@ func BuildWheel(ctx context.Context, dir string) (string, error) { log.Debugf(ctx, "Built wheel: %s", out) // and cleanup afterwards - silentlyCleanupWheelFolder(".") + CleanupWheelFolder(".") - wheel := silentChildWithSuffix("dist", ".whl") - if wheel == "" { + wheels := FindFilesWithSuffixInPath("dist", ".whl") + if len(wheels) == 0 { return "", fmt.Errorf("cannot find built wheel in %s", dir) } - return path.Join(dir, wheel), nil + if len(wheels) != 1 { + return "", fmt.Errorf("more than 1 wheel file found in %s", dir) + } + return path.Join(dir, wheels[0]), nil } const DBFSWheelLocation = "dbfs:/FileStore/wheels/simple" @@ -82,38 +84,6 @@ func UploadWheelToDBFSWithPEP503(ctx context.Context, dir string) (string, error return dbfsLoc, err } -func silentlyCleanupWheelFolder(dir string) { - // there or not there - we don't care - os.RemoveAll(path.Join(dir, "__pycache__")) - os.RemoveAll(path.Join(dir, "build")) - eggInfo := silentChildWithSuffix(dir, ".egg-info") - if eggInfo == "" { - return - } - os.RemoveAll(eggInfo) -} - -func silentChildWithSuffix(dir, suffix string) string { - f, err := os.Open(dir) - if err != nil { - log.Debugf(context.Background(), "open dir %s: %s", dir, err) - return "" - } - entries, err := f.ReadDir(0) - if err != nil { - log.Debugf(context.Background(), "read dir %s: %s", dir, err) - // todo: log - return "" - } - for _, child := range entries { - if !strings.HasSuffix(child.Name(), suffix) { - continue - } - return path.Join(dir, child.Name()) - } - return "" -} - func chdirAndBack(dir string) func() { wd, _ := os.Getwd() os.Chdir(dir)