mirror of https://github.com/databricks/cli.git
Added support for artifacts building for bundles (#583)
## Changes Added support for artifacts building for bundles. Now it allows to specify `artifacts` block in bundle.yml and define a resource (at the moment Python wheel) to be build and uploaded during `bundle deploy` Built artifact will be automatically attached to corresponding job task or pipeline where it's used as a library Follow-ups: 1. If artifact is used in job or pipeline, but not found in the config, try to infer and build it anyway 2. If build command is not provided for Python wheel artifact, infer it
This commit is contained in:
parent
fa37449f1f
commit
9a88fa602d
|
@ -0,0 +1,167 @@
|
|||
package artifacts
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
|
||||
"github.com/databricks/cli/bundle"
|
||||
"github.com/databricks/cli/bundle/artifacts/whl"
|
||||
"github.com/databricks/cli/bundle/config"
|
||||
"github.com/databricks/cli/libs/cmdio"
|
||||
"github.com/databricks/databricks-sdk-go/service/workspace"
|
||||
)
|
||||
|
||||
type mutatorFactory = func(name string) bundle.Mutator
|
||||
|
||||
var buildMutators map[config.ArtifactType]mutatorFactory = map[config.ArtifactType]mutatorFactory{
|
||||
config.ArtifactPythonWheel: whl.Build,
|
||||
}
|
||||
|
||||
var uploadMutators map[config.ArtifactType]mutatorFactory = map[config.ArtifactType]mutatorFactory{}
|
||||
|
||||
func getBuildMutator(t config.ArtifactType, name string) bundle.Mutator {
|
||||
mutatorFactory, ok := buildMutators[t]
|
||||
if !ok {
|
||||
mutatorFactory = BasicBuild
|
||||
}
|
||||
|
||||
return mutatorFactory(name)
|
||||
}
|
||||
|
||||
func getUploadMutator(t config.ArtifactType, name string) bundle.Mutator {
|
||||
mutatorFactory, ok := uploadMutators[t]
|
||||
if !ok {
|
||||
mutatorFactory = BasicUpload
|
||||
}
|
||||
|
||||
return mutatorFactory(name)
|
||||
}
|
||||
|
||||
// Basic Build defines a general build mutator which builds artifact based on artifact.BuildCommand
|
||||
type basicBuild struct {
|
||||
name string
|
||||
}
|
||||
|
||||
func BasicBuild(name string) bundle.Mutator {
|
||||
return &basicBuild{name: name}
|
||||
}
|
||||
|
||||
func (m *basicBuild) Name() string {
|
||||
return fmt.Sprintf("artifacts.Build(%s)", m.name)
|
||||
}
|
||||
|
||||
func (m *basicBuild) Apply(ctx context.Context, b *bundle.Bundle) error {
|
||||
artifact, ok := b.Config.Artifacts[m.name]
|
||||
if !ok {
|
||||
return fmt.Errorf("artifact doesn't exist: %s", m.name)
|
||||
}
|
||||
|
||||
cmdio.LogString(ctx, fmt.Sprintf("artifacts.Build(%s): Building...", m.name))
|
||||
|
||||
out, err := artifact.Build(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("artifacts.Build(%s): %w, output: %s", m.name, err, out)
|
||||
}
|
||||
cmdio.LogString(ctx, fmt.Sprintf("artifacts.Build(%s): Build succeeded", m.name))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Basic Upload defines a general upload mutator which uploads artifact as a library to workspace
|
||||
type basicUpload struct {
|
||||
name string
|
||||
}
|
||||
|
||||
func BasicUpload(name string) bundle.Mutator {
|
||||
return &basicUpload{name: name}
|
||||
}
|
||||
|
||||
func (m *basicUpload) Name() string {
|
||||
return fmt.Sprintf("artifacts.Build(%s)", m.name)
|
||||
}
|
||||
|
||||
func (m *basicUpload) Apply(ctx context.Context, b *bundle.Bundle) error {
|
||||
artifact, ok := b.Config.Artifacts[m.name]
|
||||
if !ok {
|
||||
return fmt.Errorf("artifact doesn't exist: %s", m.name)
|
||||
}
|
||||
|
||||
if len(artifact.Files) == 0 {
|
||||
return fmt.Errorf("artifact source is not configured: %s", m.name)
|
||||
}
|
||||
|
||||
err := uploadArtifact(ctx, artifact, b)
|
||||
if err != nil {
|
||||
return fmt.Errorf("artifacts.Upload(%s): %w", m.name, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func uploadArtifact(ctx context.Context, a *config.Artifact, b *bundle.Bundle) error {
|
||||
for i := range a.Files {
|
||||
f := &a.Files[i]
|
||||
if f.NeedsUpload() {
|
||||
filename := path.Base(f.Source)
|
||||
cmdio.LogString(ctx, fmt.Sprintf("artifacts.Upload(%s): Uploading...", filename))
|
||||
remotePath, err := uploadArtifactFile(ctx, f.Source, b)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cmdio.LogString(ctx, fmt.Sprintf("artifacts.Upload(%s): Upload succeeded", filename))
|
||||
|
||||
f.RemotePath = remotePath
|
||||
}
|
||||
}
|
||||
|
||||
a.NormalisePaths()
|
||||
return nil
|
||||
}
|
||||
|
||||
// Function to upload artifact file to Workspace
|
||||
func uploadArtifactFile(ctx context.Context, file string, b *bundle.Bundle) (string, error) {
|
||||
raw, err := os.ReadFile(file)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to read %s: %w", file, errors.Unwrap(err))
|
||||
}
|
||||
|
||||
uploadPath, err := getUploadBasePath(b)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
fileHash := sha256.Sum256(raw)
|
||||
remotePath := path.Join(uploadPath, fmt.Sprintf("%x", fileHash), path.Base(file))
|
||||
// Make sure target directory exists.
|
||||
err = b.WorkspaceClient().Workspace.MkdirsByPath(ctx, path.Dir(remotePath))
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to create directory for %s: %w", remotePath, err)
|
||||
}
|
||||
|
||||
// Import to workspace.
|
||||
err = b.WorkspaceClient().Workspace.Import(ctx, workspace.Import{
|
||||
Path: remotePath,
|
||||
Overwrite: true,
|
||||
Format: workspace.ImportFormatAuto,
|
||||
Content: base64.StdEncoding.EncodeToString(raw),
|
||||
})
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to import %s: %w", remotePath, err)
|
||||
}
|
||||
|
||||
return remotePath, nil
|
||||
}
|
||||
|
||||
func getUploadBasePath(b *bundle.Bundle) (string, error) {
|
||||
artifactPath := b.Config.Workspace.ArtifactsPath
|
||||
if artifactPath == "" {
|
||||
return "", fmt.Errorf("remote artifact path not configured")
|
||||
}
|
||||
|
||||
return path.Join(artifactPath, ".internal"), nil
|
||||
}
|
|
@ -3,9 +3,9 @@ package artifacts
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/databricks/cli/bundle"
|
||||
"github.com/databricks/cli/bundle/artifacts/notebook"
|
||||
)
|
||||
|
||||
func BuildAll() bundle.Mutator {
|
||||
|
@ -33,9 +33,23 @@ func (m *build) Apply(ctx context.Context, b *bundle.Bundle) error {
|
|||
return fmt.Errorf("artifact doesn't exist: %s", m.name)
|
||||
}
|
||||
|
||||
if artifact.Notebook != nil {
|
||||
return bundle.Apply(ctx, b, notebook.Build(m.name))
|
||||
if len(artifact.Files) == 0 && artifact.BuildCommand == "" {
|
||||
return fmt.Errorf("artifact %s misconfigured: 'files' or 'build' property is required", m.name)
|
||||
}
|
||||
|
||||
return nil
|
||||
// If artifact file is explicitly defined, skip building the artifact
|
||||
if len(artifact.Files) != 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If artifact path is not provided, use bundle root dir
|
||||
if artifact.Path == "" {
|
||||
artifact.Path = b.Config.Path
|
||||
}
|
||||
|
||||
if !filepath.IsAbs(artifact.Path) {
|
||||
artifact.Path = filepath.Join(b.Config.Path, artifact.Path)
|
||||
}
|
||||
|
||||
return bundle.Apply(ctx, b, getBuildMutator(artifact.Type, m.name))
|
||||
}
|
||||
|
|
|
@ -1,81 +0,0 @@
|
|||
package notebook
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/databricks/cli/bundle"
|
||||
"github.com/databricks/databricks-sdk-go/service/workspace"
|
||||
)
|
||||
|
||||
type build struct {
|
||||
name string
|
||||
}
|
||||
|
||||
func Build(name string) bundle.Mutator {
|
||||
return &build{
|
||||
name: name,
|
||||
}
|
||||
}
|
||||
|
||||
func (m *build) Name() string {
|
||||
return fmt.Sprintf("notebook.Build(%s)", m.name)
|
||||
}
|
||||
|
||||
func (m *build) Apply(_ context.Context, b *bundle.Bundle) error {
|
||||
a, ok := b.Config.Artifacts[m.name]
|
||||
if !ok {
|
||||
return fmt.Errorf("artifact doesn't exist: %s", m.name)
|
||||
}
|
||||
|
||||
artifact := a.Notebook
|
||||
|
||||
// Check if the filetype is supported.
|
||||
switch ext := strings.ToLower(filepath.Ext(artifact.Path)); ext {
|
||||
case ".py":
|
||||
artifact.Language = workspace.LanguagePython
|
||||
case ".scala":
|
||||
artifact.Language = workspace.LanguageScala
|
||||
case ".sql":
|
||||
artifact.Language = workspace.LanguageSql
|
||||
default:
|
||||
return fmt.Errorf("invalid notebook extension: %s", ext)
|
||||
}
|
||||
|
||||
// Open underlying file.
|
||||
f, err := os.Open(filepath.Join(b.Config.Path, artifact.Path))
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to open artifact file %s: %w", artifact.Path, errors.Unwrap(err))
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
// Check that the file contains the notebook marker on its first line.
|
||||
ok, err = hasMarker(artifact.Language, f)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to read artifact file %s: %s", artifact.Path, errors.Unwrap(err))
|
||||
}
|
||||
if !ok {
|
||||
return fmt.Errorf("notebook marker not found in %s", artifact.Path)
|
||||
}
|
||||
|
||||
// Check that an artifact path is defined.
|
||||
remotePath := b.Config.Workspace.ArtifactsPath
|
||||
if remotePath == "" {
|
||||
return fmt.Errorf("remote artifact path not configured")
|
||||
}
|
||||
|
||||
// Store absolute paths.
|
||||
artifact.LocalPath = filepath.Join(b.Config.Path, artifact.Path)
|
||||
artifact.RemotePath = path.Join(remotePath, stripExtension(artifact.Path))
|
||||
return nil
|
||||
}
|
||||
|
||||
func stripExtension(path string) string {
|
||||
ext := filepath.Ext(path)
|
||||
return path[0 : len(path)-len(ext)]
|
||||
}
|
|
@ -1,29 +0,0 @@
|
|||
package notebook
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/databricks/databricks-sdk-go/service/workspace"
|
||||
)
|
||||
|
||||
func hasMarker(l workspace.Language, r io.Reader) (bool, error) {
|
||||
scanner := bufio.NewScanner(r)
|
||||
ok := scanner.Scan()
|
||||
if !ok {
|
||||
return false, scanner.Err()
|
||||
}
|
||||
|
||||
line := strings.TrimSpace(scanner.Text())
|
||||
switch l {
|
||||
case workspace.LanguagePython:
|
||||
return line == "# Databricks notebook source", nil
|
||||
case workspace.LanguageScala:
|
||||
return line == "// Databricks notebook source", nil
|
||||
case workspace.LanguageSql:
|
||||
return line == "-- Databricks notebook source", nil
|
||||
default:
|
||||
panic("language not handled: " + l)
|
||||
}
|
||||
}
|
|
@ -1,60 +0,0 @@
|
|||
package notebook
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
|
||||
"github.com/databricks/cli/bundle"
|
||||
"github.com/databricks/databricks-sdk-go/service/workspace"
|
||||
)
|
||||
|
||||
type upload struct {
|
||||
name string
|
||||
}
|
||||
|
||||
func Upload(name string) bundle.Mutator {
|
||||
return &upload{
|
||||
name: name,
|
||||
}
|
||||
}
|
||||
|
||||
func (m *upload) Name() string {
|
||||
return fmt.Sprintf("notebook.Upload(%s)", m.name)
|
||||
}
|
||||
|
||||
func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) error {
|
||||
a, ok := b.Config.Artifacts[m.name]
|
||||
if !ok {
|
||||
return fmt.Errorf("artifact doesn't exist: %s", m.name)
|
||||
}
|
||||
|
||||
artifact := a.Notebook
|
||||
raw, err := os.ReadFile(artifact.LocalPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to read %s: %w", m.name, errors.Unwrap(err))
|
||||
}
|
||||
|
||||
// Make sure target directory exists.
|
||||
err = b.WorkspaceClient().Workspace.MkdirsByPath(ctx, path.Dir(artifact.RemotePath))
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to create directory for %s: %w", m.name, err)
|
||||
}
|
||||
|
||||
// Import to workspace.
|
||||
err = b.WorkspaceClient().Workspace.Import(ctx, workspace.Import{
|
||||
Path: artifact.RemotePath,
|
||||
Overwrite: true,
|
||||
Format: workspace.ImportFormatSource,
|
||||
Language: artifact.Language,
|
||||
Content: base64.StdEncoding.EncodeToString(raw),
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to import %s: %w", m.name, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -5,7 +5,7 @@ import (
|
|||
"fmt"
|
||||
|
||||
"github.com/databricks/cli/bundle"
|
||||
"github.com/databricks/cli/bundle/artifacts/notebook"
|
||||
"github.com/databricks/databricks-sdk-go/service/workspace"
|
||||
)
|
||||
|
||||
func UploadAll() bundle.Mutator {
|
||||
|
@ -15,6 +15,10 @@ func UploadAll() bundle.Mutator {
|
|||
}
|
||||
}
|
||||
|
||||
func CleanUp() bundle.Mutator {
|
||||
return &cleanUp{}
|
||||
}
|
||||
|
||||
type upload struct {
|
||||
name string
|
||||
}
|
||||
|
@ -33,8 +37,33 @@ func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) error {
|
|||
return fmt.Errorf("artifact doesn't exist: %s", m.name)
|
||||
}
|
||||
|
||||
if artifact.Notebook != nil {
|
||||
return bundle.Apply(ctx, b, notebook.Upload(m.name))
|
||||
if len(artifact.Files) == 0 {
|
||||
return fmt.Errorf("artifact source is not configured: %s", m.name)
|
||||
}
|
||||
|
||||
return bundle.Apply(ctx, b, getUploadMutator(artifact.Type, m.name))
|
||||
}
|
||||
|
||||
type cleanUp struct{}
|
||||
|
||||
func (m *cleanUp) Name() string {
|
||||
return "artifacts.CleanUp"
|
||||
}
|
||||
|
||||
func (m *cleanUp) Apply(ctx context.Context, b *bundle.Bundle) error {
|
||||
uploadPath, err := getUploadBasePath(b)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
b.WorkspaceClient().Workspace.Delete(ctx, workspace.Delete{
|
||||
Path: uploadPath,
|
||||
Recursive: true,
|
||||
})
|
||||
|
||||
err = b.WorkspaceClient().Workspace.MkdirsByPath(ctx, uploadPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to create directory for %s: %w", uploadPath, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
package whl
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/databricks/cli/bundle"
|
||||
"github.com/databricks/cli/bundle/config"
|
||||
"github.com/databricks/cli/libs/cmdio"
|
||||
"github.com/databricks/cli/python"
|
||||
)
|
||||
|
||||
type build struct {
|
||||
name string
|
||||
}
|
||||
|
||||
func Build(name string) bundle.Mutator {
|
||||
return &build{
|
||||
name: name,
|
||||
}
|
||||
}
|
||||
|
||||
func (m *build) Name() string {
|
||||
return fmt.Sprintf("artifacts.whl.Build(%s)", m.name)
|
||||
}
|
||||
|
||||
func (m *build) Apply(ctx context.Context, b *bundle.Bundle) error {
|
||||
artifact, ok := b.Config.Artifacts[m.name]
|
||||
if !ok {
|
||||
return fmt.Errorf("artifact doesn't exist: %s", m.name)
|
||||
}
|
||||
|
||||
// TODO: If not set, BuildCommand should be infer prior to this
|
||||
// via a mutator so that it can be observable.
|
||||
if artifact.BuildCommand == "" {
|
||||
return fmt.Errorf("artifacts.whl.Build(%s): missing build property for the artifact", m.name)
|
||||
}
|
||||
|
||||
cmdio.LogString(ctx, fmt.Sprintf("artifacts.whl.Build(%s): Building...", m.name))
|
||||
|
||||
dir := artifact.Path
|
||||
|
||||
distPath := filepath.Join(dir, "dist")
|
||||
os.RemoveAll(distPath)
|
||||
python.CleanupWheelFolder(dir)
|
||||
|
||||
out, err := artifact.Build(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("artifacts.whl.Build(%s): Failed %w, output: %s", m.name, err, out)
|
||||
}
|
||||
cmdio.LogString(ctx, fmt.Sprintf("artifacts.whl.Build(%s): Build succeeded", m.name))
|
||||
|
||||
wheels := python.FindFilesWithSuffixInPath(distPath, ".whl")
|
||||
if len(wheels) == 0 {
|
||||
return fmt.Errorf("artifacts.whl.Build(%s): cannot find built wheel in %s", m.name, dir)
|
||||
}
|
||||
for _, wheel := range wheels {
|
||||
artifact.Files = append(artifact.Files, config.ArtifactFile{
|
||||
Source: wheel,
|
||||
})
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
|
@ -1,20 +1,76 @@
|
|||
package config
|
||||
|
||||
import "github.com/databricks/databricks-sdk-go/service/workspace"
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"github.com/databricks/databricks-sdk-go/service/compute"
|
||||
)
|
||||
|
||||
type ArtifactType string
|
||||
|
||||
const ArtifactPythonWheel ArtifactType = `whl`
|
||||
|
||||
type ArtifactFile struct {
|
||||
Source string `json:"source"`
|
||||
RemotePath string `json:"-" bundle:"readonly"`
|
||||
Libraries []*compute.Library `json:"-" bundle:"readonly"`
|
||||
}
|
||||
|
||||
// Artifact defines a single local code artifact that can be
|
||||
// built/uploaded/referenced in the context of this bundle.
|
||||
type Artifact struct {
|
||||
Notebook *NotebookArtifact `json:"notebook,omitempty"`
|
||||
}
|
||||
Type ArtifactType `json:"type"`
|
||||
|
||||
type NotebookArtifact struct {
|
||||
// The local path to the directory with a root of artifact,
|
||||
// for example, where setup.py is for Python projects
|
||||
Path string `json:"path"`
|
||||
|
||||
// Language is detected during build step.
|
||||
Language workspace.Language `json:"language,omitempty" bundle:"readonly"`
|
||||
|
||||
// Paths are synthesized during build step.
|
||||
LocalPath string `json:"local_path,omitempty" bundle:"readonly"`
|
||||
RemotePath string `json:"remote_path,omitempty" bundle:"readonly"`
|
||||
// The relative or absolute path to the built artifact files
|
||||
// (Python wheel, Java jar and etc) itself
|
||||
Files []ArtifactFile `json:"files"`
|
||||
BuildCommand string `json:"build"`
|
||||
}
|
||||
|
||||
func (a *Artifact) Build(ctx context.Context) ([]byte, error) {
|
||||
if a.BuildCommand == "" {
|
||||
return nil, fmt.Errorf("no build property defined")
|
||||
}
|
||||
|
||||
buildParts := strings.Split(a.BuildCommand, " ")
|
||||
cmd := exec.CommandContext(ctx, buildParts[0], buildParts[1:]...)
|
||||
cmd.Dir = a.Path
|
||||
return cmd.CombinedOutput()
|
||||
}
|
||||
|
||||
func (a *Artifact) NormalisePaths() {
|
||||
for _, f := range a.Files {
|
||||
// If no libraries attached, nothing to normalise, skipping
|
||||
if f.Libraries == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
wsfsBase := "/Workspace"
|
||||
remotePath := path.Join(wsfsBase, f.RemotePath)
|
||||
for i := range f.Libraries {
|
||||
lib := f.Libraries[i]
|
||||
switch a.Type {
|
||||
case ArtifactPythonWheel:
|
||||
lib.Whl = remotePath
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// This function determines if artifact files needs to be uploaded.
|
||||
// During the bundle processing we analyse which library uses which artifact file.
|
||||
// If artifact file is used as a library, we store the reference to this library in artifact file Libraries field.
|
||||
// If artifact file has libraries it's been used in, it means than we need to upload this file.
|
||||
// Otherwise this artifact file is not used and we skip uploading
|
||||
func (af *ArtifactFile) NeedsUpload() bool {
|
||||
return af.Libraries != nil
|
||||
}
|
||||
|
|
|
@ -0,0 +1,107 @@
|
|||
package libraries
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/databricks/cli/bundle"
|
||||
"github.com/databricks/cli/bundle/config"
|
||||
"github.com/databricks/cli/libs/cmdio"
|
||||
"github.com/databricks/databricks-sdk-go/service/compute"
|
||||
"github.com/databricks/databricks-sdk-go/service/jobs"
|
||||
)
|
||||
|
||||
type match struct {
|
||||
}
|
||||
|
||||
func MatchWithArtifacts() bundle.Mutator {
|
||||
return &match{}
|
||||
}
|
||||
|
||||
func (a *match) Name() string {
|
||||
return "libraries.MatchWithArtifacts"
|
||||
}
|
||||
|
||||
func (a *match) Apply(ctx context.Context, b *bundle.Bundle) error {
|
||||
r := b.Config.Resources
|
||||
for k := range b.Config.Resources.Jobs {
|
||||
tasks := r.Jobs[k].JobSettings.Tasks
|
||||
for i := range tasks {
|
||||
task := &tasks[i]
|
||||
if isMissingRequiredLibraries(task) {
|
||||
return fmt.Errorf("task '%s' is missing required libraries. Please include your package code in task libraries block", task.TaskKey)
|
||||
}
|
||||
for j := range task.Libraries {
|
||||
lib := &task.Libraries[j]
|
||||
err := findArtifactsAndMarkForUpload(ctx, lib, b)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func isMissingRequiredLibraries(task *jobs.Task) bool {
|
||||
if task.Libraries != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
return task.PythonWheelTask != nil || task.SparkJarTask != nil
|
||||
}
|
||||
|
||||
func findLibraryMatches(lib *compute.Library, b *bundle.Bundle) ([]string, error) {
|
||||
path := libPath(lib)
|
||||
if path == "" {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
fullPath := filepath.Join(b.Config.Path, path)
|
||||
return filepath.Glob(fullPath)
|
||||
}
|
||||
|
||||
func findArtifactsAndMarkForUpload(ctx context.Context, lib *compute.Library, b *bundle.Bundle) error {
|
||||
matches, err := findLibraryMatches(lib, b)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, match := range matches {
|
||||
af, err := findArtifactFileByLocalPath(match, b)
|
||||
if err != nil {
|
||||
cmdio.LogString(ctx, fmt.Sprintf("%s. Skipping %s. In order to use the library upload it manually", err.Error(), match))
|
||||
} else {
|
||||
af.Libraries = append(af.Libraries, lib)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func findArtifactFileByLocalPath(path string, b *bundle.Bundle) (*config.ArtifactFile, error) {
|
||||
for _, a := range b.Config.Artifacts {
|
||||
for k := range a.Files {
|
||||
if a.Files[k].Source == path {
|
||||
return &a.Files[k], nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("artifact file is not found for path %s", path)
|
||||
}
|
||||
|
||||
func libPath(library *compute.Library) string {
|
||||
if library.Whl != "" {
|
||||
return library.Whl
|
||||
}
|
||||
if library.Jar != "" {
|
||||
return library.Jar
|
||||
}
|
||||
if library.Egg != "" {
|
||||
return library.Egg
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
|
@ -6,6 +6,7 @@ import (
|
|||
"github.com/databricks/cli/bundle/deploy/files"
|
||||
"github.com/databricks/cli/bundle/deploy/lock"
|
||||
"github.com/databricks/cli/bundle/deploy/terraform"
|
||||
"github.com/databricks/cli/bundle/libraries"
|
||||
)
|
||||
|
||||
// The deploy phase deploys artifacts and resources.
|
||||
|
@ -15,6 +16,8 @@ func Deploy() bundle.Mutator {
|
|||
bundle.Defer(
|
||||
bundle.Seq(
|
||||
files.Upload(),
|
||||
libraries.MatchWithArtifacts(),
|
||||
artifacts.CleanUp(),
|
||||
artifacts.UploadAll(),
|
||||
terraform.Interpolate(),
|
||||
terraform.Write(),
|
||||
|
|
|
@ -0,0 +1,3 @@
|
|||
build/
|
||||
*.egg-info
|
||||
.databricks
|
|
@ -0,0 +1,19 @@
|
|||
bundle:
|
||||
name: python-wheel
|
||||
|
||||
artifacts:
|
||||
my_test_code:
|
||||
type: whl
|
||||
path: "./my_test_code"
|
||||
build: "/usr/local/bin/python setup.py bdist_wheel"
|
||||
|
||||
resources:
|
||||
jobs:
|
||||
test_job:
|
||||
name: "[${bundle.environment}] My Wheel Job"
|
||||
tasks:
|
||||
- task_key: TestTask
|
||||
existing_cluster_id: "0717-132531-5opeqon1"
|
||||
python_wheel_task:
|
||||
package_name: "my_test_code"
|
||||
entry_point: "run"
|
|
@ -0,0 +1,15 @@
|
|||
from setuptools import setup, find_packages
|
||||
|
||||
import src
|
||||
|
||||
setup(
|
||||
name="my_test_code",
|
||||
version=src.__version__,
|
||||
author=src.__author__,
|
||||
url="https://databricks.com",
|
||||
author_email="john.doe@databricks.com",
|
||||
description="my test wheel",
|
||||
packages=find_packages(include=["src"]),
|
||||
entry_points={"group_1": "run=src.__main__:main"},
|
||||
install_requires=["setuptools"],
|
||||
)
|
|
@ -0,0 +1,2 @@
|
|||
__version__ = "0.0.1"
|
||||
__author__ = "Databricks"
|
|
@ -0,0 +1,16 @@
|
|||
"""
|
||||
The entry point of the Python Wheel
|
||||
"""
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
def main():
|
||||
# This method will print the provided arguments
|
||||
print('Hello from my func')
|
||||
print('Got arguments:')
|
||||
print(sys.argv)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,26 @@
|
|||
package bundle
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/databricks/cli/bundle"
|
||||
"github.com/databricks/cli/bundle/phases"
|
||||
"github.com/databricks/cli/internal"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestAccBundlePythonWheelBuild(t *testing.T) {
|
||||
t.Log(internal.GetEnvOrSkipTest(t, "CLOUD_ENV"))
|
||||
|
||||
b, err := bundle.Load("./python_wheel")
|
||||
require.NoError(t, err)
|
||||
|
||||
m := phases.Build()
|
||||
err = m.Apply(context.Background(), b)
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = os.Stat("./python_wheel/my_test_code/dist/my_test_code-0.0.1-py2-none-any.whl")
|
||||
require.NoError(t, err)
|
||||
}
|
|
@ -0,0 +1,48 @@
|
|||
package python
|
||||
|
||||
// TODO: move this package into the libs
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"github.com/databricks/cli/libs/log"
|
||||
)
|
||||
|
||||
func CleanupWheelFolder(dir string) {
|
||||
// there or not there - we don't care
|
||||
os.RemoveAll(path.Join(dir, "__pycache__"))
|
||||
os.RemoveAll(path.Join(dir, "build"))
|
||||
eggInfo := FindFilesWithSuffixInPath(dir, ".egg-info")
|
||||
if len(eggInfo) == 0 {
|
||||
return
|
||||
}
|
||||
for _, f := range eggInfo {
|
||||
os.RemoveAll(f)
|
||||
}
|
||||
}
|
||||
|
||||
func FindFilesWithSuffixInPath(dir, suffix string) []string {
|
||||
f, err := os.Open(dir)
|
||||
if err != nil {
|
||||
log.Debugf(context.Background(), "open dir %s: %s", dir, err)
|
||||
return nil
|
||||
}
|
||||
entries, err := f.ReadDir(0)
|
||||
if err != nil {
|
||||
log.Debugf(context.Background(), "read dir %s: %s", dir, err)
|
||||
// todo: log
|
||||
return nil
|
||||
}
|
||||
|
||||
files := make([]string, 0)
|
||||
for _, child := range entries {
|
||||
if !strings.HasSuffix(child.Name(), suffix) {
|
||||
continue
|
||||
}
|
||||
files = append(files, path.Join(dir, child.Name()))
|
||||
}
|
||||
return files
|
||||
}
|
|
@ -6,7 +6,6 @@ import (
|
|||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"github.com/databricks/cli/libs/log"
|
||||
"github.com/databricks/databricks-sdk-go"
|
||||
|
@ -18,7 +17,7 @@ func BuildWheel(ctx context.Context, dir string) (string, error) {
|
|||
// remove previous dist leak
|
||||
os.RemoveAll("dist")
|
||||
// remove all other irrelevant traces
|
||||
silentlyCleanupWheelFolder(".")
|
||||
CleanupWheelFolder(".")
|
||||
// call simple wheel builder. we may need to pip install wheel as well
|
||||
out, err := Py(ctx, "setup.py", "bdist_wheel")
|
||||
if err != nil {
|
||||
|
@ -27,13 +26,16 @@ func BuildWheel(ctx context.Context, dir string) (string, error) {
|
|||
log.Debugf(ctx, "Built wheel: %s", out)
|
||||
|
||||
// and cleanup afterwards
|
||||
silentlyCleanupWheelFolder(".")
|
||||
CleanupWheelFolder(".")
|
||||
|
||||
wheel := silentChildWithSuffix("dist", ".whl")
|
||||
if wheel == "" {
|
||||
wheels := FindFilesWithSuffixInPath("dist", ".whl")
|
||||
if len(wheels) == 0 {
|
||||
return "", fmt.Errorf("cannot find built wheel in %s", dir)
|
||||
}
|
||||
return path.Join(dir, wheel), nil
|
||||
if len(wheels) != 1 {
|
||||
return "", fmt.Errorf("more than 1 wheel file found in %s", dir)
|
||||
}
|
||||
return path.Join(dir, wheels[0]), nil
|
||||
}
|
||||
|
||||
const DBFSWheelLocation = "dbfs:/FileStore/wheels/simple"
|
||||
|
@ -82,38 +84,6 @@ func UploadWheelToDBFSWithPEP503(ctx context.Context, dir string) (string, error
|
|||
return dbfsLoc, err
|
||||
}
|
||||
|
||||
func silentlyCleanupWheelFolder(dir string) {
|
||||
// there or not there - we don't care
|
||||
os.RemoveAll(path.Join(dir, "__pycache__"))
|
||||
os.RemoveAll(path.Join(dir, "build"))
|
||||
eggInfo := silentChildWithSuffix(dir, ".egg-info")
|
||||
if eggInfo == "" {
|
||||
return
|
||||
}
|
||||
os.RemoveAll(eggInfo)
|
||||
}
|
||||
|
||||
func silentChildWithSuffix(dir, suffix string) string {
|
||||
f, err := os.Open(dir)
|
||||
if err != nil {
|
||||
log.Debugf(context.Background(), "open dir %s: %s", dir, err)
|
||||
return ""
|
||||
}
|
||||
entries, err := f.ReadDir(0)
|
||||
if err != nil {
|
||||
log.Debugf(context.Background(), "read dir %s: %s", dir, err)
|
||||
// todo: log
|
||||
return ""
|
||||
}
|
||||
for _, child := range entries {
|
||||
if !strings.HasSuffix(child.Name(), suffix) {
|
||||
continue
|
||||
}
|
||||
return path.Join(dir, child.Name())
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func chdirAndBack(dir string) func() {
|
||||
wd, _ := os.Getwd()
|
||||
os.Chdir(dir)
|
||||
|
|
Loading…
Reference in New Issue