diff --git a/bundle/artifacts/artifacts.go b/bundle/artifacts/artifacts.go new file mode 100644 index 00000000..c5413121 --- /dev/null +++ b/bundle/artifacts/artifacts.go @@ -0,0 +1,167 @@ +package artifacts + +import ( + "context" + "crypto/sha256" + "encoding/base64" + "errors" + "fmt" + "os" + "path" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/artifacts/whl" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go/service/workspace" +) + +type mutatorFactory = func(name string) bundle.Mutator + +var buildMutators map[config.ArtifactType]mutatorFactory = map[config.ArtifactType]mutatorFactory{ + config.ArtifactPythonWheel: whl.Build, +} + +var uploadMutators map[config.ArtifactType]mutatorFactory = map[config.ArtifactType]mutatorFactory{} + +func getBuildMutator(t config.ArtifactType, name string) bundle.Mutator { + mutatorFactory, ok := buildMutators[t] + if !ok { + mutatorFactory = BasicBuild + } + + return mutatorFactory(name) +} + +func getUploadMutator(t config.ArtifactType, name string) bundle.Mutator { + mutatorFactory, ok := uploadMutators[t] + if !ok { + mutatorFactory = BasicUpload + } + + return mutatorFactory(name) +} + +// Basic Build defines a general build mutator which builds artifact based on artifact.BuildCommand +type basicBuild struct { + name string +} + +func BasicBuild(name string) bundle.Mutator { + return &basicBuild{name: name} +} + +func (m *basicBuild) Name() string { + return fmt.Sprintf("artifacts.Build(%s)", m.name) +} + +func (m *basicBuild) Apply(ctx context.Context, b *bundle.Bundle) error { + artifact, ok := b.Config.Artifacts[m.name] + if !ok { + return fmt.Errorf("artifact doesn't exist: %s", m.name) + } + + cmdio.LogString(ctx, fmt.Sprintf("artifacts.Build(%s): Building...", m.name)) + + out, err := artifact.Build(ctx) + if err != nil { + return fmt.Errorf("artifacts.Build(%s): %w, output: %s", m.name, err, out) + } + cmdio.LogString(ctx, fmt.Sprintf("artifacts.Build(%s): Build succeeded", m.name)) + + return nil +} + +// Basic Upload defines a general upload mutator which uploads artifact as a library to workspace +type basicUpload struct { + name string +} + +func BasicUpload(name string) bundle.Mutator { + return &basicUpload{name: name} +} + +func (m *basicUpload) Name() string { + return fmt.Sprintf("artifacts.Build(%s)", m.name) +} + +func (m *basicUpload) Apply(ctx context.Context, b *bundle.Bundle) error { + artifact, ok := b.Config.Artifacts[m.name] + if !ok { + return fmt.Errorf("artifact doesn't exist: %s", m.name) + } + + if len(artifact.Files) == 0 { + return fmt.Errorf("artifact source is not configured: %s", m.name) + } + + err := uploadArtifact(ctx, artifact, b) + if err != nil { + return fmt.Errorf("artifacts.Upload(%s): %w", m.name, err) + } + + return nil +} + +func uploadArtifact(ctx context.Context, a *config.Artifact, b *bundle.Bundle) error { + for i := range a.Files { + f := &a.Files[i] + if f.NeedsUpload() { + filename := path.Base(f.Source) + cmdio.LogString(ctx, fmt.Sprintf("artifacts.Upload(%s): Uploading...", filename)) + remotePath, err := uploadArtifactFile(ctx, f.Source, b) + if err != nil { + return err + } + cmdio.LogString(ctx, fmt.Sprintf("artifacts.Upload(%s): Upload succeeded", filename)) + + f.RemotePath = remotePath + } + } + + a.NormalisePaths() + return nil +} + +// Function to upload artifact file to Workspace +func uploadArtifactFile(ctx context.Context, file string, b *bundle.Bundle) (string, error) { + raw, err := os.ReadFile(file) + if err != nil { + return "", fmt.Errorf("unable to read %s: %w", file, errors.Unwrap(err)) + } + + uploadPath, err := getUploadBasePath(b) + if err != nil { + return "", err + } + + fileHash := sha256.Sum256(raw) + remotePath := path.Join(uploadPath, fmt.Sprintf("%x", fileHash), path.Base(file)) + // Make sure target directory exists. + err = b.WorkspaceClient().Workspace.MkdirsByPath(ctx, path.Dir(remotePath)) + if err != nil { + return "", fmt.Errorf("unable to create directory for %s: %w", remotePath, err) + } + + // Import to workspace. + err = b.WorkspaceClient().Workspace.Import(ctx, workspace.Import{ + Path: remotePath, + Overwrite: true, + Format: workspace.ImportFormatAuto, + Content: base64.StdEncoding.EncodeToString(raw), + }) + if err != nil { + return "", fmt.Errorf("unable to import %s: %w", remotePath, err) + } + + return remotePath, nil +} + +func getUploadBasePath(b *bundle.Bundle) (string, error) { + artifactPath := b.Config.Workspace.ArtifactsPath + if artifactPath == "" { + return "", fmt.Errorf("remote artifact path not configured") + } + + return path.Join(artifactPath, ".internal"), nil +} diff --git a/bundle/artifacts/build.go b/bundle/artifacts/build.go index 294351f4..7721635a 100644 --- a/bundle/artifacts/build.go +++ b/bundle/artifacts/build.go @@ -3,9 +3,9 @@ package artifacts import ( "context" "fmt" + "path/filepath" "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/artifacts/notebook" ) func BuildAll() bundle.Mutator { @@ -33,9 +33,23 @@ func (m *build) Apply(ctx context.Context, b *bundle.Bundle) error { return fmt.Errorf("artifact doesn't exist: %s", m.name) } - if artifact.Notebook != nil { - return bundle.Apply(ctx, b, notebook.Build(m.name)) + if len(artifact.Files) == 0 && artifact.BuildCommand == "" { + return fmt.Errorf("artifact %s misconfigured: 'files' or 'build' property is required", m.name) } - return nil + // If artifact file is explicitly defined, skip building the artifact + if len(artifact.Files) != 0 { + return nil + } + + // If artifact path is not provided, use bundle root dir + if artifact.Path == "" { + artifact.Path = b.Config.Path + } + + if !filepath.IsAbs(artifact.Path) { + artifact.Path = filepath.Join(b.Config.Path, artifact.Path) + } + + return bundle.Apply(ctx, b, getBuildMutator(artifact.Type, m.name)) } diff --git a/bundle/artifacts/notebook/build.go b/bundle/artifacts/notebook/build.go deleted file mode 100644 index 4a25868a..00000000 --- a/bundle/artifacts/notebook/build.go +++ /dev/null @@ -1,81 +0,0 @@ -package notebook - -import ( - "context" - "errors" - "fmt" - "os" - "path" - "path/filepath" - "strings" - - "github.com/databricks/cli/bundle" - "github.com/databricks/databricks-sdk-go/service/workspace" -) - -type build struct { - name string -} - -func Build(name string) bundle.Mutator { - return &build{ - name: name, - } -} - -func (m *build) Name() string { - return fmt.Sprintf("notebook.Build(%s)", m.name) -} - -func (m *build) Apply(_ context.Context, b *bundle.Bundle) error { - a, ok := b.Config.Artifacts[m.name] - if !ok { - return fmt.Errorf("artifact doesn't exist: %s", m.name) - } - - artifact := a.Notebook - - // Check if the filetype is supported. - switch ext := strings.ToLower(filepath.Ext(artifact.Path)); ext { - case ".py": - artifact.Language = workspace.LanguagePython - case ".scala": - artifact.Language = workspace.LanguageScala - case ".sql": - artifact.Language = workspace.LanguageSql - default: - return fmt.Errorf("invalid notebook extension: %s", ext) - } - - // Open underlying file. - f, err := os.Open(filepath.Join(b.Config.Path, artifact.Path)) - if err != nil { - return fmt.Errorf("unable to open artifact file %s: %w", artifact.Path, errors.Unwrap(err)) - } - defer f.Close() - - // Check that the file contains the notebook marker on its first line. - ok, err = hasMarker(artifact.Language, f) - if err != nil { - return fmt.Errorf("unable to read artifact file %s: %s", artifact.Path, errors.Unwrap(err)) - } - if !ok { - return fmt.Errorf("notebook marker not found in %s", artifact.Path) - } - - // Check that an artifact path is defined. - remotePath := b.Config.Workspace.ArtifactsPath - if remotePath == "" { - return fmt.Errorf("remote artifact path not configured") - } - - // Store absolute paths. - artifact.LocalPath = filepath.Join(b.Config.Path, artifact.Path) - artifact.RemotePath = path.Join(remotePath, stripExtension(artifact.Path)) - return nil -} - -func stripExtension(path string) string { - ext := filepath.Ext(path) - return path[0 : len(path)-len(ext)] -} diff --git a/bundle/artifacts/notebook/marker.go b/bundle/artifacts/notebook/marker.go deleted file mode 100644 index a04ca989..00000000 --- a/bundle/artifacts/notebook/marker.go +++ /dev/null @@ -1,29 +0,0 @@ -package notebook - -import ( - "bufio" - "io" - "strings" - - "github.com/databricks/databricks-sdk-go/service/workspace" -) - -func hasMarker(l workspace.Language, r io.Reader) (bool, error) { - scanner := bufio.NewScanner(r) - ok := scanner.Scan() - if !ok { - return false, scanner.Err() - } - - line := strings.TrimSpace(scanner.Text()) - switch l { - case workspace.LanguagePython: - return line == "# Databricks notebook source", nil - case workspace.LanguageScala: - return line == "// Databricks notebook source", nil - case workspace.LanguageSql: - return line == "-- Databricks notebook source", nil - default: - panic("language not handled: " + l) - } -} diff --git a/bundle/artifacts/notebook/upload.go b/bundle/artifacts/notebook/upload.go deleted file mode 100644 index 38ac9d61..00000000 --- a/bundle/artifacts/notebook/upload.go +++ /dev/null @@ -1,60 +0,0 @@ -package notebook - -import ( - "context" - "encoding/base64" - "errors" - "fmt" - "os" - "path" - - "github.com/databricks/cli/bundle" - "github.com/databricks/databricks-sdk-go/service/workspace" -) - -type upload struct { - name string -} - -func Upload(name string) bundle.Mutator { - return &upload{ - name: name, - } -} - -func (m *upload) Name() string { - return fmt.Sprintf("notebook.Upload(%s)", m.name) -} - -func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) error { - a, ok := b.Config.Artifacts[m.name] - if !ok { - return fmt.Errorf("artifact doesn't exist: %s", m.name) - } - - artifact := a.Notebook - raw, err := os.ReadFile(artifact.LocalPath) - if err != nil { - return fmt.Errorf("unable to read %s: %w", m.name, errors.Unwrap(err)) - } - - // Make sure target directory exists. - err = b.WorkspaceClient().Workspace.MkdirsByPath(ctx, path.Dir(artifact.RemotePath)) - if err != nil { - return fmt.Errorf("unable to create directory for %s: %w", m.name, err) - } - - // Import to workspace. - err = b.WorkspaceClient().Workspace.Import(ctx, workspace.Import{ - Path: artifact.RemotePath, - Overwrite: true, - Format: workspace.ImportFormatSource, - Language: artifact.Language, - Content: base64.StdEncoding.EncodeToString(raw), - }) - if err != nil { - return fmt.Errorf("unable to import %s: %w", m.name, err) - } - - return nil -} diff --git a/bundle/artifacts/upload.go b/bundle/artifacts/upload.go index f5ce2b23..990718aa 100644 --- a/bundle/artifacts/upload.go +++ b/bundle/artifacts/upload.go @@ -5,7 +5,7 @@ import ( "fmt" "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/artifacts/notebook" + "github.com/databricks/databricks-sdk-go/service/workspace" ) func UploadAll() bundle.Mutator { @@ -15,6 +15,10 @@ func UploadAll() bundle.Mutator { } } +func CleanUp() bundle.Mutator { + return &cleanUp{} +} + type upload struct { name string } @@ -33,8 +37,33 @@ func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) error { return fmt.Errorf("artifact doesn't exist: %s", m.name) } - if artifact.Notebook != nil { - return bundle.Apply(ctx, b, notebook.Upload(m.name)) + if len(artifact.Files) == 0 { + return fmt.Errorf("artifact source is not configured: %s", m.name) + } + + return bundle.Apply(ctx, b, getUploadMutator(artifact.Type, m.name)) +} + +type cleanUp struct{} + +func (m *cleanUp) Name() string { + return "artifacts.CleanUp" +} + +func (m *cleanUp) Apply(ctx context.Context, b *bundle.Bundle) error { + uploadPath, err := getUploadBasePath(b) + if err != nil { + return err + } + + b.WorkspaceClient().Workspace.Delete(ctx, workspace.Delete{ + Path: uploadPath, + Recursive: true, + }) + + err = b.WorkspaceClient().Workspace.MkdirsByPath(ctx, uploadPath) + if err != nil { + return fmt.Errorf("unable to create directory for %s: %w", uploadPath, err) } return nil diff --git a/bundle/artifacts/whl/build.go b/bundle/artifacts/whl/build.go new file mode 100644 index 00000000..4ee47153 --- /dev/null +++ b/bundle/artifacts/whl/build.go @@ -0,0 +1,66 @@ +package whl + +import ( + "context" + "fmt" + "os" + "path/filepath" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/cli/python" +) + +type build struct { + name string +} + +func Build(name string) bundle.Mutator { + return &build{ + name: name, + } +} + +func (m *build) Name() string { + return fmt.Sprintf("artifacts.whl.Build(%s)", m.name) +} + +func (m *build) Apply(ctx context.Context, b *bundle.Bundle) error { + artifact, ok := b.Config.Artifacts[m.name] + if !ok { + return fmt.Errorf("artifact doesn't exist: %s", m.name) + } + + // TODO: If not set, BuildCommand should be infer prior to this + // via a mutator so that it can be observable. + if artifact.BuildCommand == "" { + return fmt.Errorf("artifacts.whl.Build(%s): missing build property for the artifact", m.name) + } + + cmdio.LogString(ctx, fmt.Sprintf("artifacts.whl.Build(%s): Building...", m.name)) + + dir := artifact.Path + + distPath := filepath.Join(dir, "dist") + os.RemoveAll(distPath) + python.CleanupWheelFolder(dir) + + out, err := artifact.Build(ctx) + if err != nil { + return fmt.Errorf("artifacts.whl.Build(%s): Failed %w, output: %s", m.name, err, out) + } + cmdio.LogString(ctx, fmt.Sprintf("artifacts.whl.Build(%s): Build succeeded", m.name)) + + wheels := python.FindFilesWithSuffixInPath(distPath, ".whl") + if len(wheels) == 0 { + return fmt.Errorf("artifacts.whl.Build(%s): cannot find built wheel in %s", m.name, dir) + } + for _, wheel := range wheels { + artifact.Files = append(artifact.Files, config.ArtifactFile{ + Source: wheel, + }) + } + + return nil +} diff --git a/bundle/config/artifact.go b/bundle/config/artifact.go index f782fcfc..1ac371e9 100644 --- a/bundle/config/artifact.go +++ b/bundle/config/artifact.go @@ -1,20 +1,76 @@ package config -import "github.com/databricks/databricks-sdk-go/service/workspace" +import ( + "context" + "fmt" + "os/exec" + "path" + "strings" + + "github.com/databricks/databricks-sdk-go/service/compute" +) + +type ArtifactType string + +const ArtifactPythonWheel ArtifactType = `whl` + +type ArtifactFile struct { + Source string `json:"source"` + RemotePath string `json:"-" bundle:"readonly"` + Libraries []*compute.Library `json:"-" bundle:"readonly"` +} // Artifact defines a single local code artifact that can be // built/uploaded/referenced in the context of this bundle. type Artifact struct { - Notebook *NotebookArtifact `json:"notebook,omitempty"` -} + Type ArtifactType `json:"type"` -type NotebookArtifact struct { + // The local path to the directory with a root of artifact, + // for example, where setup.py is for Python projects Path string `json:"path"` - // Language is detected during build step. - Language workspace.Language `json:"language,omitempty" bundle:"readonly"` - - // Paths are synthesized during build step. - LocalPath string `json:"local_path,omitempty" bundle:"readonly"` - RemotePath string `json:"remote_path,omitempty" bundle:"readonly"` + // The relative or absolute path to the built artifact files + // (Python wheel, Java jar and etc) itself + Files []ArtifactFile `json:"files"` + BuildCommand string `json:"build"` +} + +func (a *Artifact) Build(ctx context.Context) ([]byte, error) { + if a.BuildCommand == "" { + return nil, fmt.Errorf("no build property defined") + } + + buildParts := strings.Split(a.BuildCommand, " ") + cmd := exec.CommandContext(ctx, buildParts[0], buildParts[1:]...) + cmd.Dir = a.Path + return cmd.CombinedOutput() +} + +func (a *Artifact) NormalisePaths() { + for _, f := range a.Files { + // If no libraries attached, nothing to normalise, skipping + if f.Libraries == nil { + continue + } + + wsfsBase := "/Workspace" + remotePath := path.Join(wsfsBase, f.RemotePath) + for i := range f.Libraries { + lib := f.Libraries[i] + switch a.Type { + case ArtifactPythonWheel: + lib.Whl = remotePath + } + } + + } +} + +// This function determines if artifact files needs to be uploaded. +// During the bundle processing we analyse which library uses which artifact file. +// If artifact file is used as a library, we store the reference to this library in artifact file Libraries field. +// If artifact file has libraries it's been used in, it means than we need to upload this file. +// Otherwise this artifact file is not used and we skip uploading +func (af *ArtifactFile) NeedsUpload() bool { + return af.Libraries != nil } diff --git a/bundle/libraries/libraries.go b/bundle/libraries/libraries.go new file mode 100644 index 00000000..ff86a34b --- /dev/null +++ b/bundle/libraries/libraries.go @@ -0,0 +1,107 @@ +package libraries + +import ( + "context" + "fmt" + "path/filepath" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/libs/cmdio" + "github.com/databricks/databricks-sdk-go/service/compute" + "github.com/databricks/databricks-sdk-go/service/jobs" +) + +type match struct { +} + +func MatchWithArtifacts() bundle.Mutator { + return &match{} +} + +func (a *match) Name() string { + return "libraries.MatchWithArtifacts" +} + +func (a *match) Apply(ctx context.Context, b *bundle.Bundle) error { + r := b.Config.Resources + for k := range b.Config.Resources.Jobs { + tasks := r.Jobs[k].JobSettings.Tasks + for i := range tasks { + task := &tasks[i] + if isMissingRequiredLibraries(task) { + return fmt.Errorf("task '%s' is missing required libraries. Please include your package code in task libraries block", task.TaskKey) + } + for j := range task.Libraries { + lib := &task.Libraries[j] + err := findArtifactsAndMarkForUpload(ctx, lib, b) + if err != nil { + return err + } + } + } + } + return nil +} + +func isMissingRequiredLibraries(task *jobs.Task) bool { + if task.Libraries != nil { + return false + } + + return task.PythonWheelTask != nil || task.SparkJarTask != nil +} + +func findLibraryMatches(lib *compute.Library, b *bundle.Bundle) ([]string, error) { + path := libPath(lib) + if path == "" { + return nil, nil + } + + fullPath := filepath.Join(b.Config.Path, path) + return filepath.Glob(fullPath) +} + +func findArtifactsAndMarkForUpload(ctx context.Context, lib *compute.Library, b *bundle.Bundle) error { + matches, err := findLibraryMatches(lib, b) + if err != nil { + return err + } + + for _, match := range matches { + af, err := findArtifactFileByLocalPath(match, b) + if err != nil { + cmdio.LogString(ctx, fmt.Sprintf("%s. Skipping %s. In order to use the library upload it manually", err.Error(), match)) + } else { + af.Libraries = append(af.Libraries, lib) + } + } + + return nil +} + +func findArtifactFileByLocalPath(path string, b *bundle.Bundle) (*config.ArtifactFile, error) { + for _, a := range b.Config.Artifacts { + for k := range a.Files { + if a.Files[k].Source == path { + return &a.Files[k], nil + } + } + } + + return nil, fmt.Errorf("artifact file is not found for path %s", path) +} + +func libPath(library *compute.Library) string { + if library.Whl != "" { + return library.Whl + } + if library.Jar != "" { + return library.Jar + } + if library.Egg != "" { + return library.Egg + } + + return "" +} diff --git a/bundle/phases/deploy.go b/bundle/phases/deploy.go index f2692ea9..8b53273c 100644 --- a/bundle/phases/deploy.go +++ b/bundle/phases/deploy.go @@ -6,6 +6,7 @@ import ( "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/bundle/deploy/lock" "github.com/databricks/cli/bundle/deploy/terraform" + "github.com/databricks/cli/bundle/libraries" ) // The deploy phase deploys artifacts and resources. @@ -15,6 +16,8 @@ func Deploy() bundle.Mutator { bundle.Defer( bundle.Seq( files.Upload(), + libraries.MatchWithArtifacts(), + artifacts.CleanUp(), artifacts.UploadAll(), terraform.Interpolate(), terraform.Write(), diff --git a/bundle/tests/bundle/python_wheel/.gitignore b/bundle/tests/bundle/python_wheel/.gitignore new file mode 100644 index 00000000..f03e23bc --- /dev/null +++ b/bundle/tests/bundle/python_wheel/.gitignore @@ -0,0 +1,3 @@ +build/ +*.egg-info +.databricks diff --git a/bundle/tests/bundle/python_wheel/bundle.yml b/bundle/tests/bundle/python_wheel/bundle.yml new file mode 100644 index 00000000..9c518589 --- /dev/null +++ b/bundle/tests/bundle/python_wheel/bundle.yml @@ -0,0 +1,19 @@ +bundle: + name: python-wheel + +artifacts: + my_test_code: + type: whl + path: "./my_test_code" + build: "/usr/local/bin/python setup.py bdist_wheel" + +resources: + jobs: + test_job: + name: "[${bundle.environment}] My Wheel Job" + tasks: + - task_key: TestTask + existing_cluster_id: "0717-132531-5opeqon1" + python_wheel_task: + package_name: "my_test_code" + entry_point: "run" diff --git a/bundle/tests/bundle/python_wheel/my_test_code/setup.py b/bundle/tests/bundle/python_wheel/my_test_code/setup.py new file mode 100644 index 00000000..0bd871dd --- /dev/null +++ b/bundle/tests/bundle/python_wheel/my_test_code/setup.py @@ -0,0 +1,15 @@ +from setuptools import setup, find_packages + +import src + +setup( + name="my_test_code", + version=src.__version__, + author=src.__author__, + url="https://databricks.com", + author_email="john.doe@databricks.com", + description="my test wheel", + packages=find_packages(include=["src"]), + entry_points={"group_1": "run=src.__main__:main"}, + install_requires=["setuptools"], +) diff --git a/bundle/tests/bundle/python_wheel/my_test_code/src/__init__.py b/bundle/tests/bundle/python_wheel/my_test_code/src/__init__.py new file mode 100644 index 00000000..909f1f32 --- /dev/null +++ b/bundle/tests/bundle/python_wheel/my_test_code/src/__init__.py @@ -0,0 +1,2 @@ +__version__ = "0.0.1" +__author__ = "Databricks" diff --git a/bundle/tests/bundle/python_wheel/my_test_code/src/__main__.py b/bundle/tests/bundle/python_wheel/my_test_code/src/__main__.py new file mode 100644 index 00000000..73d045af --- /dev/null +++ b/bundle/tests/bundle/python_wheel/my_test_code/src/__main__.py @@ -0,0 +1,16 @@ +""" +The entry point of the Python Wheel +""" + +import sys + + +def main(): + # This method will print the provided arguments + print('Hello from my func') + print('Got arguments:') + print(sys.argv) + + +if __name__ == '__main__': + main() diff --git a/bundle/tests/bundle/wheel_test.go b/bundle/tests/bundle/wheel_test.go new file mode 100644 index 00000000..9a6b2fd2 --- /dev/null +++ b/bundle/tests/bundle/wheel_test.go @@ -0,0 +1,26 @@ +package bundle + +import ( + "context" + "os" + "testing" + + "github.com/databricks/cli/bundle" + "github.com/databricks/cli/bundle/phases" + "github.com/databricks/cli/internal" + "github.com/stretchr/testify/require" +) + +func TestAccBundlePythonWheelBuild(t *testing.T) { + t.Log(internal.GetEnvOrSkipTest(t, "CLOUD_ENV")) + + b, err := bundle.Load("./python_wheel") + require.NoError(t, err) + + m := phases.Build() + err = m.Apply(context.Background(), b) + require.NoError(t, err) + + _, err = os.Stat("./python_wheel/my_test_code/dist/my_test_code-0.0.1-py2-none-any.whl") + require.NoError(t, err) +} diff --git a/python/utils.go b/python/utils.go new file mode 100644 index 00000000..10654edc --- /dev/null +++ b/python/utils.go @@ -0,0 +1,48 @@ +package python + +// TODO: move this package into the libs + +import ( + "context" + "os" + "path" + "strings" + + "github.com/databricks/cli/libs/log" +) + +func CleanupWheelFolder(dir string) { + // there or not there - we don't care + os.RemoveAll(path.Join(dir, "__pycache__")) + os.RemoveAll(path.Join(dir, "build")) + eggInfo := FindFilesWithSuffixInPath(dir, ".egg-info") + if len(eggInfo) == 0 { + return + } + for _, f := range eggInfo { + os.RemoveAll(f) + } +} + +func FindFilesWithSuffixInPath(dir, suffix string) []string { + f, err := os.Open(dir) + if err != nil { + log.Debugf(context.Background(), "open dir %s: %s", dir, err) + return nil + } + entries, err := f.ReadDir(0) + if err != nil { + log.Debugf(context.Background(), "read dir %s: %s", dir, err) + // todo: log + return nil + } + + files := make([]string, 0) + for _, child := range entries { + if !strings.HasSuffix(child.Name(), suffix) { + continue + } + files = append(files, path.Join(dir, child.Name())) + } + return files +} diff --git a/python/wheel.go b/python/wheel.go index ff05509d..39c3d4cb 100644 --- a/python/wheel.go +++ b/python/wheel.go @@ -6,7 +6,6 @@ import ( "io" "os" "path" - "strings" "github.com/databricks/cli/libs/log" "github.com/databricks/databricks-sdk-go" @@ -18,7 +17,7 @@ func BuildWheel(ctx context.Context, dir string) (string, error) { // remove previous dist leak os.RemoveAll("dist") // remove all other irrelevant traces - silentlyCleanupWheelFolder(".") + CleanupWheelFolder(".") // call simple wheel builder. we may need to pip install wheel as well out, err := Py(ctx, "setup.py", "bdist_wheel") if err != nil { @@ -27,13 +26,16 @@ func BuildWheel(ctx context.Context, dir string) (string, error) { log.Debugf(ctx, "Built wheel: %s", out) // and cleanup afterwards - silentlyCleanupWheelFolder(".") + CleanupWheelFolder(".") - wheel := silentChildWithSuffix("dist", ".whl") - if wheel == "" { + wheels := FindFilesWithSuffixInPath("dist", ".whl") + if len(wheels) == 0 { return "", fmt.Errorf("cannot find built wheel in %s", dir) } - return path.Join(dir, wheel), nil + if len(wheels) != 1 { + return "", fmt.Errorf("more than 1 wheel file found in %s", dir) + } + return path.Join(dir, wheels[0]), nil } const DBFSWheelLocation = "dbfs:/FileStore/wheels/simple" @@ -82,38 +84,6 @@ func UploadWheelToDBFSWithPEP503(ctx context.Context, dir string) (string, error return dbfsLoc, err } -func silentlyCleanupWheelFolder(dir string) { - // there or not there - we don't care - os.RemoveAll(path.Join(dir, "__pycache__")) - os.RemoveAll(path.Join(dir, "build")) - eggInfo := silentChildWithSuffix(dir, ".egg-info") - if eggInfo == "" { - return - } - os.RemoveAll(eggInfo) -} - -func silentChildWithSuffix(dir, suffix string) string { - f, err := os.Open(dir) - if err != nil { - log.Debugf(context.Background(), "open dir %s: %s", dir, err) - return "" - } - entries, err := f.ReadDir(0) - if err != nil { - log.Debugf(context.Background(), "read dir %s: %s", dir, err) - // todo: log - return "" - } - for _, child := range entries { - if !strings.HasSuffix(child.Name(), suffix) { - continue - } - return path.Join(dir, child.Name()) - } - return "" -} - func chdirAndBack(dir string) func() { wd, _ := os.Getwd() os.Chdir(dir)