mirror of https://github.com/databricks/cli.git
Refactor library to artifact matching to not use pointers (#1172)
## Changes The approach to do this was: 1. Iterate over all libraries in all job tasks 2. Find references to local libraries 3. Store pointer to `compute.Library` in the matching artifact file to signal it should be uploaded This breaks down when introducing #1098 because we can no longer track unexported state across mutators. The approach in this PR performs the path matching twice; once in the matching mutator where we check if each referenced file has an artifacts section, and once during artifact upload to rewrite the library path from a local file reference to an absolute Databricks path. ## Tests Integration tests pass.
This commit is contained in:
parent
cb3ad737f1
commit
33c446dadd
|
@ -12,6 +12,7 @@ import (
|
||||||
"github.com/databricks/cli/bundle"
|
"github.com/databricks/cli/bundle"
|
||||||
"github.com/databricks/cli/bundle/artifacts/whl"
|
"github.com/databricks/cli/bundle/artifacts/whl"
|
||||||
"github.com/databricks/cli/bundle/config"
|
"github.com/databricks/cli/bundle/config"
|
||||||
|
"github.com/databricks/cli/bundle/libraries"
|
||||||
"github.com/databricks/cli/libs/cmdio"
|
"github.com/databricks/cli/libs/cmdio"
|
||||||
"github.com/databricks/cli/libs/filer"
|
"github.com/databricks/cli/libs/filer"
|
||||||
"github.com/databricks/cli/libs/log"
|
"github.com/databricks/cli/libs/log"
|
||||||
|
@ -106,7 +107,7 @@ func (m *basicUpload) Apply(ctx context.Context, b *bundle.Bundle) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
err = uploadArtifact(ctx, artifact, uploadPath, client)
|
err = uploadArtifact(ctx, b, artifact, uploadPath, client)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("upload for %s failed, error: %w", m.name, err)
|
return fmt.Errorf("upload for %s failed, error: %w", m.name, err)
|
||||||
}
|
}
|
||||||
|
@ -114,23 +115,45 @@ func (m *basicUpload) Apply(ctx context.Context, b *bundle.Bundle) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func uploadArtifact(ctx context.Context, a *config.Artifact, uploadPath string, client filer.Filer) error {
|
func uploadArtifact(ctx context.Context, b *bundle.Bundle, a *config.Artifact, uploadPath string, client filer.Filer) error {
|
||||||
|
filesToLibraries := libraries.MapFilesToTaskLibraries(ctx, b)
|
||||||
|
|
||||||
for i := range a.Files {
|
for i := range a.Files {
|
||||||
f := &a.Files[i]
|
f := &a.Files[i]
|
||||||
if f.NeedsUpload() {
|
|
||||||
filename := filepath.Base(f.Source)
|
|
||||||
cmdio.LogString(ctx, fmt.Sprintf("Uploading %s...", filename))
|
|
||||||
|
|
||||||
err := uploadArtifactFile(ctx, f.Source, client)
|
// Lookup all tasks that reference this file.
|
||||||
if err != nil {
|
libs, ok := filesToLibraries[f.Source]
|
||||||
return err
|
if !ok {
|
||||||
|
log.Debugf(ctx, "No tasks reference %s. Skipping upload.", f.Source)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
filename := filepath.Base(f.Source)
|
||||||
|
cmdio.LogString(ctx, fmt.Sprintf("Uploading %s...", filename))
|
||||||
|
|
||||||
|
err := uploadArtifactFile(ctx, f.Source, client)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Infof(ctx, "Upload succeeded")
|
||||||
|
f.RemotePath = path.Join(uploadPath, filepath.Base(f.Source))
|
||||||
|
|
||||||
|
// Update all tasks that reference this file.
|
||||||
|
for _, lib := range libs {
|
||||||
|
wsfsBase := "/Workspace"
|
||||||
|
remotePath := path.Join(wsfsBase, f.RemotePath)
|
||||||
|
if lib.Whl != "" {
|
||||||
|
lib.Whl = remotePath
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if lib.Jar != "" {
|
||||||
|
lib.Jar = remotePath
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
log.Infof(ctx, "Upload succeeded")
|
|
||||||
f.RemotePath = path.Join(uploadPath, filepath.Base(f.Source))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
a.NormalisePaths()
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,11 +3,9 @@ package config
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"path"
|
|
||||||
|
|
||||||
"github.com/databricks/cli/bundle/config/paths"
|
"github.com/databricks/cli/bundle/config/paths"
|
||||||
"github.com/databricks/cli/libs/exec"
|
"github.com/databricks/cli/libs/exec"
|
||||||
"github.com/databricks/databricks-sdk-go/service/compute"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type Artifacts map[string]*Artifact
|
type Artifacts map[string]*Artifact
|
||||||
|
@ -23,9 +21,8 @@ type ArtifactType string
|
||||||
const ArtifactPythonWheel ArtifactType = `whl`
|
const ArtifactPythonWheel ArtifactType = `whl`
|
||||||
|
|
||||||
type ArtifactFile struct {
|
type ArtifactFile struct {
|
||||||
Source string `json:"source"`
|
Source string `json:"source"`
|
||||||
RemotePath string `json:"-" bundle:"readonly"`
|
RemotePath string `json:"remote_path" bundle:"readonly"`
|
||||||
Libraries []*compute.Library `json:"-" bundle:"readonly"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Artifact defines a single local code artifact that can be
|
// Artifact defines a single local code artifact that can be
|
||||||
|
@ -65,36 +62,3 @@ func (a *Artifact) Build(ctx context.Context) ([]byte, error) {
|
||||||
}
|
}
|
||||||
return e.Exec(ctx, a.BuildCommand)
|
return e.Exec(ctx, a.BuildCommand)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *Artifact) NormalisePaths() {
|
|
||||||
for _, f := range a.Files {
|
|
||||||
// If no libraries attached, nothing to normalise, skipping
|
|
||||||
if f.Libraries == nil {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
wsfsBase := "/Workspace"
|
|
||||||
remotePath := path.Join(wsfsBase, f.RemotePath)
|
|
||||||
for i := range f.Libraries {
|
|
||||||
lib := f.Libraries[i]
|
|
||||||
if lib.Whl != "" {
|
|
||||||
lib.Whl = remotePath
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if lib.Jar != "" {
|
|
||||||
lib.Jar = remotePath
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// This function determines if artifact files needs to be uploaded.
|
|
||||||
// During the bundle processing we analyse which library uses which artifact file.
|
|
||||||
// If artifact file is used as a library, we store the reference to this library in artifact file Libraries field.
|
|
||||||
// If artifact file has libraries it's been used in, it means than we need to upload this file.
|
|
||||||
// Otherwise this artifact file is not used and we skip uploading
|
|
||||||
func (af *ArtifactFile) NeedsUpload() bool {
|
|
||||||
return af.Libraries != nil
|
|
||||||
}
|
|
||||||
|
|
|
@ -0,0 +1,16 @@
|
||||||
|
package libraries
|
||||||
|
|
||||||
|
import "github.com/databricks/databricks-sdk-go/service/compute"
|
||||||
|
|
||||||
|
func libraryPath(library *compute.Library) string {
|
||||||
|
if library.Whl != "" {
|
||||||
|
return library.Whl
|
||||||
|
}
|
||||||
|
if library.Jar != "" {
|
||||||
|
return library.Jar
|
||||||
|
}
|
||||||
|
if library.Egg != "" {
|
||||||
|
return library.Egg
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
|
@ -0,0 +1,17 @@
|
||||||
|
package libraries
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/databricks/databricks-sdk-go/service/compute"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestLibraryPath(t *testing.T) {
|
||||||
|
path := "/some/path"
|
||||||
|
|
||||||
|
assert.Equal(t, path, libraryPath(&compute.Library{Whl: path}))
|
||||||
|
assert.Equal(t, path, libraryPath(&compute.Library{Jar: path}))
|
||||||
|
assert.Equal(t, path, libraryPath(&compute.Library{Egg: path}))
|
||||||
|
assert.Equal(t, "", libraryPath(&compute.Library{}))
|
||||||
|
}
|
|
@ -3,46 +3,16 @@ package libraries
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/url"
|
|
||||||
"path"
|
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/databricks/cli/bundle"
|
"github.com/databricks/cli/bundle"
|
||||||
"github.com/databricks/cli/bundle/config"
|
"github.com/databricks/cli/bundle/config"
|
||||||
"github.com/databricks/cli/libs/cmdio"
|
"github.com/databricks/cli/libs/cmdio"
|
||||||
|
"github.com/databricks/cli/libs/log"
|
||||||
"github.com/databricks/databricks-sdk-go/service/compute"
|
"github.com/databricks/databricks-sdk-go/service/compute"
|
||||||
"github.com/databricks/databricks-sdk-go/service/jobs"
|
"github.com/databricks/databricks-sdk-go/service/jobs"
|
||||||
)
|
)
|
||||||
|
|
||||||
type match struct {
|
|
||||||
}
|
|
||||||
|
|
||||||
func MatchWithArtifacts() bundle.Mutator {
|
|
||||||
return &match{}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *match) Name() string {
|
|
||||||
return "libraries.MatchWithArtifacts"
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *match) Apply(ctx context.Context, b *bundle.Bundle) error {
|
|
||||||
tasks := findAllTasks(b)
|
|
||||||
for _, task := range tasks {
|
|
||||||
if isMissingRequiredLibraries(task) {
|
|
||||||
return fmt.Errorf("task '%s' is missing required libraries. Please include your package code in task libraries block", task.TaskKey)
|
|
||||||
}
|
|
||||||
for j := range task.Libraries {
|
|
||||||
lib := &task.Libraries[j]
|
|
||||||
err := findArtifactsAndMarkForUpload(ctx, lib, b)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func findAllTasks(b *bundle.Bundle) []*jobs.Task {
|
func findAllTasks(b *bundle.Bundle) []*jobs.Task {
|
||||||
r := b.Config.Resources
|
r := b.Config.Resources
|
||||||
result := make([]*jobs.Task, 0)
|
result := make([]*jobs.Task, 0)
|
||||||
|
@ -71,7 +41,7 @@ func FindAllWheelTasksWithLocalLibraries(b *bundle.Bundle) []*jobs.Task {
|
||||||
|
|
||||||
func IsTaskWithLocalLibraries(task *jobs.Task) bool {
|
func IsTaskWithLocalLibraries(task *jobs.Task) bool {
|
||||||
for _, l := range task.Libraries {
|
for _, l := range task.Libraries {
|
||||||
if isLocalLibrary(&l) {
|
if IsLocalLibrary(&l) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -81,8 +51,7 @@ func IsTaskWithLocalLibraries(task *jobs.Task) bool {
|
||||||
|
|
||||||
func IsTaskWithWorkspaceLibraries(task *jobs.Task) bool {
|
func IsTaskWithWorkspaceLibraries(task *jobs.Task) bool {
|
||||||
for _, l := range task.Libraries {
|
for _, l := range task.Libraries {
|
||||||
path := libPath(&l)
|
if IsWorkspaceLibrary(&l) {
|
||||||
if isWorkspacePath(path) {
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -90,16 +59,8 @@ func IsTaskWithWorkspaceLibraries(task *jobs.Task) bool {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func isMissingRequiredLibraries(task *jobs.Task) bool {
|
|
||||||
if task.Libraries != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
return task.PythonWheelTask != nil || task.SparkJarTask != nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func findLibraryMatches(lib *compute.Library, b *bundle.Bundle) ([]string, error) {
|
func findLibraryMatches(lib *compute.Library, b *bundle.Bundle) ([]string, error) {
|
||||||
path := libPath(lib)
|
path := libraryPath(lib)
|
||||||
if path == "" {
|
if path == "" {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
@ -108,26 +69,27 @@ func findLibraryMatches(lib *compute.Library, b *bundle.Bundle) ([]string, error
|
||||||
return filepath.Glob(fullPath)
|
return filepath.Glob(fullPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
func findArtifactsAndMarkForUpload(ctx context.Context, lib *compute.Library, b *bundle.Bundle) error {
|
func findArtifactFiles(ctx context.Context, lib *compute.Library, b *bundle.Bundle) ([]*config.ArtifactFile, error) {
|
||||||
matches, err := findLibraryMatches(lib, b)
|
matches, err := findLibraryMatches(lib, b)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(matches) == 0 && isLocalLibrary(lib) {
|
if len(matches) == 0 && IsLocalLibrary(lib) {
|
||||||
return fmt.Errorf("file %s is referenced in libraries section but doesn't exist on the local file system", libPath(lib))
|
return nil, fmt.Errorf("file %s is referenced in libraries section but doesn't exist on the local file system", libraryPath(lib))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var out []*config.ArtifactFile
|
||||||
for _, match := range matches {
|
for _, match := range matches {
|
||||||
af, err := findArtifactFileByLocalPath(match, b)
|
af, err := findArtifactFileByLocalPath(match, b)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
cmdio.LogString(ctx, fmt.Sprintf("%s. Skipping uploading. In order to use the define 'artifacts' section", err.Error()))
|
cmdio.LogString(ctx, fmt.Sprintf("%s. Skipping uploading. In order to use the define 'artifacts' section", err.Error()))
|
||||||
} else {
|
} else {
|
||||||
af.Libraries = append(af.Libraries, lib)
|
out = append(out, af)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return out, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func findArtifactFileByLocalPath(path string, b *bundle.Bundle) (*config.ArtifactFile, error) {
|
func findArtifactFileByLocalPath(path string, b *bundle.Bundle) (*config.ArtifactFile, error) {
|
||||||
|
@ -142,67 +104,27 @@ func findArtifactFileByLocalPath(path string, b *bundle.Bundle) (*config.Artifac
|
||||||
return nil, fmt.Errorf("artifact section is not defined for file at %s", path)
|
return nil, fmt.Errorf("artifact section is not defined for file at %s", path)
|
||||||
}
|
}
|
||||||
|
|
||||||
func libPath(library *compute.Library) string {
|
func MapFilesToTaskLibraries(ctx context.Context, b *bundle.Bundle) map[string][]*compute.Library {
|
||||||
if library.Whl != "" {
|
tasks := findAllTasks(b)
|
||||||
return library.Whl
|
out := make(map[string][]*compute.Library)
|
||||||
}
|
for _, task := range tasks {
|
||||||
if library.Jar != "" {
|
for j := range task.Libraries {
|
||||||
return library.Jar
|
lib := &task.Libraries[j]
|
||||||
}
|
if !IsLocalLibrary(lib) {
|
||||||
if library.Egg != "" {
|
continue
|
||||||
return library.Egg
|
}
|
||||||
|
|
||||||
|
matches, err := findLibraryMatches(lib, b)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf(ctx, "Error matching library to files: %s", err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, match := range matches {
|
||||||
|
out[match] = append(out[match], lib)
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return ""
|
return out
|
||||||
}
|
|
||||||
|
|
||||||
func isLocalLibrary(library *compute.Library) bool {
|
|
||||||
path := libPath(library)
|
|
||||||
if path == "" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
return IsLocalPath(path)
|
|
||||||
}
|
|
||||||
|
|
||||||
func IsLocalPath(path string) bool {
|
|
||||||
if isExplicitFileScheme(path) {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
if isRemoteStorageScheme(path) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
return !isAbsoluteRemotePath(path)
|
|
||||||
}
|
|
||||||
|
|
||||||
func isExplicitFileScheme(path string) bool {
|
|
||||||
return strings.HasPrefix(path, "file://")
|
|
||||||
}
|
|
||||||
|
|
||||||
func isRemoteStorageScheme(path string) bool {
|
|
||||||
url, err := url.Parse(path)
|
|
||||||
if err != nil {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
if url.Scheme == "" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the path starts with scheme:/ format, it's a correct remote storage scheme
|
|
||||||
return strings.HasPrefix(path, url.Scheme+":/")
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
func isWorkspacePath(path string) bool {
|
|
||||||
return strings.HasPrefix(path, "/Workspace/") ||
|
|
||||||
strings.HasPrefix(path, "/Users/") ||
|
|
||||||
strings.HasPrefix(path, "/Shared/")
|
|
||||||
}
|
|
||||||
|
|
||||||
func isAbsoluteRemotePath(p string) bool {
|
|
||||||
// If path for library starts with /, it's a remote absolute path
|
|
||||||
return path.IsAbs(p)
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,31 +1,88 @@
|
||||||
package libraries
|
package libraries
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"context"
|
||||||
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/databricks/cli/bundle"
|
||||||
|
"github.com/databricks/cli/bundle/config"
|
||||||
|
"github.com/databricks/cli/bundle/config/resources"
|
||||||
"github.com/databricks/databricks-sdk-go/service/compute"
|
"github.com/databricks/databricks-sdk-go/service/compute"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/databricks/databricks-sdk-go/service/jobs"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
)
|
)
|
||||||
|
|
||||||
var testCases map[string]bool = map[string]bool{
|
func TestMapFilesToTaskLibrariesNoGlob(t *testing.T) {
|
||||||
"./some/local/path": true,
|
b := &bundle.Bundle{
|
||||||
"/some/full/path": false,
|
Config: config.Root{
|
||||||
"/Workspace/path/to/package": false,
|
Path: "testdata",
|
||||||
"/Users/path/to/package": false,
|
Resources: config.Resources{
|
||||||
"file://path/to/package": true,
|
Jobs: map[string]*resources.Job{
|
||||||
"C:\\path\\to\\package": true,
|
"job1": {
|
||||||
"dbfs://path/to/package": false,
|
JobSettings: &jobs.JobSettings{
|
||||||
"dbfs:/path/to/package": false,
|
Tasks: []jobs.Task{
|
||||||
"s3://path/to/package": false,
|
{
|
||||||
"abfss://path/to/package": false,
|
Libraries: []compute.Library{
|
||||||
}
|
{
|
||||||
|
Whl: "library1",
|
||||||
func TestIsLocalLbrary(t *testing.T) {
|
},
|
||||||
for p, result := range testCases {
|
{
|
||||||
lib := compute.Library{
|
Whl: "library2",
|
||||||
Whl: p,
|
},
|
||||||
}
|
{
|
||||||
require.Equal(t, result, isLocalLibrary(&lib), fmt.Sprintf("isLocalLibrary must return %t for path %s ", result, p))
|
Whl: "/absolute/path/in/workspace/library3",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Libraries: []compute.Library{
|
||||||
|
{
|
||||||
|
Whl: "library1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Whl: "library2",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"job2": {
|
||||||
|
JobSettings: &jobs.JobSettings{
|
||||||
|
Tasks: []jobs.Task{
|
||||||
|
{
|
||||||
|
Libraries: []compute.Library{
|
||||||
|
{
|
||||||
|
Whl: "library1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Whl: "library2",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
out := MapFilesToTaskLibraries(context.Background(), b)
|
||||||
|
assert.Len(t, out, 2)
|
||||||
|
|
||||||
|
// Pointer equality for "library1"
|
||||||
|
assert.Equal(t, []*compute.Library{
|
||||||
|
&b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].Libraries[0],
|
||||||
|
&b.Config.Resources.Jobs["job1"].JobSettings.Tasks[1].Libraries[0],
|
||||||
|
&b.Config.Resources.Jobs["job2"].JobSettings.Tasks[0].Libraries[0],
|
||||||
|
}, out[filepath.Clean("testdata/library1")])
|
||||||
|
|
||||||
|
// Pointer equality for "library2"
|
||||||
|
assert.Equal(t, []*compute.Library{
|
||||||
|
&b.Config.Resources.Jobs["job1"].JobSettings.Tasks[0].Libraries[1],
|
||||||
|
&b.Config.Resources.Jobs["job1"].JobSettings.Tasks[1].Libraries[1],
|
||||||
|
&b.Config.Resources.Jobs["job2"].JobSettings.Tasks[0].Libraries[1],
|
||||||
|
}, out[filepath.Clean("testdata/library2")])
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,63 @@
|
||||||
|
package libraries
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/url"
|
||||||
|
"path"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/databricks/databricks-sdk-go/service/compute"
|
||||||
|
)
|
||||||
|
|
||||||
|
// IsLocalPath returns true if the specified path indicates that
|
||||||
|
// it should be interpreted as a path on the local file system.
|
||||||
|
//
|
||||||
|
// The following paths are considered local:
|
||||||
|
//
|
||||||
|
// - myfile.txt
|
||||||
|
// - ./myfile.txt
|
||||||
|
// - ../myfile.txt
|
||||||
|
// - file:///foo/bar/myfile.txt
|
||||||
|
//
|
||||||
|
// The following paths are considered remote:
|
||||||
|
//
|
||||||
|
// - dbfs:/mnt/myfile.txt
|
||||||
|
// - s3:/mybucket/myfile.txt
|
||||||
|
// - /Users/jane@doe.com/myfile.txt
|
||||||
|
func IsLocalPath(p string) bool {
|
||||||
|
// If the path has the explicit file scheme, it's a local path.
|
||||||
|
if strings.HasPrefix(p, "file://") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the path has another scheme, it's a remote path.
|
||||||
|
if isRemoteStorageScheme(p) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// If path starts with /, it's a remote absolute path
|
||||||
|
return !path.IsAbs(p)
|
||||||
|
}
|
||||||
|
|
||||||
|
func isRemoteStorageScheme(path string) bool {
|
||||||
|
url, err := url.Parse(path)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if url.Scheme == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the path starts with scheme:/ format, it's a correct remote storage scheme
|
||||||
|
return strings.HasPrefix(path, url.Scheme+":/")
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsLocalLibrary returns true if the specified library refers to a local path.
|
||||||
|
func IsLocalLibrary(library *compute.Library) bool {
|
||||||
|
path := libraryPath(library)
|
||||||
|
if path == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return IsLocalPath(path)
|
||||||
|
}
|
|
@ -0,0 +1,43 @@
|
||||||
|
package libraries
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/databricks/databricks-sdk-go/service/compute"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestIsLocalPath(t *testing.T) {
|
||||||
|
// Relative paths, paths with the file scheme, and Windows paths.
|
||||||
|
assert.True(t, IsLocalPath("./some/local/path"))
|
||||||
|
assert.True(t, IsLocalPath("file://path/to/package"))
|
||||||
|
assert.True(t, IsLocalPath("C:\\path\\to\\package"))
|
||||||
|
assert.True(t, IsLocalPath("myfile.txt"))
|
||||||
|
assert.True(t, IsLocalPath("./myfile.txt"))
|
||||||
|
assert.True(t, IsLocalPath("../myfile.txt"))
|
||||||
|
assert.True(t, IsLocalPath("file:///foo/bar/myfile.txt"))
|
||||||
|
|
||||||
|
// Absolute paths.
|
||||||
|
assert.False(t, IsLocalPath("/some/full/path"))
|
||||||
|
assert.False(t, IsLocalPath("/Workspace/path/to/package"))
|
||||||
|
assert.False(t, IsLocalPath("/Users/path/to/package"))
|
||||||
|
|
||||||
|
// Paths with schemes.
|
||||||
|
assert.False(t, IsLocalPath("dbfs://path/to/package"))
|
||||||
|
assert.False(t, IsLocalPath("dbfs:/path/to/package"))
|
||||||
|
assert.False(t, IsLocalPath("s3://path/to/package"))
|
||||||
|
assert.False(t, IsLocalPath("abfss://path/to/package"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIsLocalLibrary(t *testing.T) {
|
||||||
|
// Local paths.
|
||||||
|
assert.True(t, IsLocalLibrary(&compute.Library{Whl: "./file.whl"}))
|
||||||
|
assert.True(t, IsLocalLibrary(&compute.Library{Jar: "../target/some.jar"}))
|
||||||
|
|
||||||
|
// Non-local paths.
|
||||||
|
assert.False(t, IsLocalLibrary(&compute.Library{Whl: "/Workspace/path/to/file.whl"}))
|
||||||
|
assert.False(t, IsLocalLibrary(&compute.Library{Jar: "s3:/bucket/path/some.jar"}))
|
||||||
|
|
||||||
|
// Empty.
|
||||||
|
assert.False(t, IsLocalLibrary(&compute.Library{}))
|
||||||
|
}
|
|
@ -0,0 +1,45 @@
|
||||||
|
package libraries
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/databricks/cli/bundle"
|
||||||
|
"github.com/databricks/databricks-sdk-go/service/jobs"
|
||||||
|
)
|
||||||
|
|
||||||
|
type match struct {
|
||||||
|
}
|
||||||
|
|
||||||
|
func MatchWithArtifacts() bundle.Mutator {
|
||||||
|
return &match{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *match) Name() string {
|
||||||
|
return "libraries.MatchWithArtifacts"
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *match) Apply(ctx context.Context, b *bundle.Bundle) error {
|
||||||
|
tasks := findAllTasks(b)
|
||||||
|
for _, task := range tasks {
|
||||||
|
if isMissingRequiredLibraries(task) {
|
||||||
|
return fmt.Errorf("task '%s' is missing required libraries. Please include your package code in task libraries block", task.TaskKey)
|
||||||
|
}
|
||||||
|
for j := range task.Libraries {
|
||||||
|
lib := &task.Libraries[j]
|
||||||
|
_, err := findArtifactFiles(ctx, lib, b)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func isMissingRequiredLibraries(task *jobs.Task) bool {
|
||||||
|
if task.Libraries != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return task.PythonWheelTask != nil || task.SparkJarTask != nil
|
||||||
|
}
|
|
@ -0,0 +1 @@
|
||||||
|
package libraries
|
|
@ -0,0 +1,38 @@
|
||||||
|
package libraries
|
||||||
|
|
||||||
|
import (
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/databricks/databricks-sdk-go/service/compute"
|
||||||
|
)
|
||||||
|
|
||||||
|
// IsWorkspacePath returns true if the specified path indicates that
|
||||||
|
// it should be interpreted as a Databricks Workspace path.
|
||||||
|
//
|
||||||
|
// The following paths are considered workspace paths:
|
||||||
|
//
|
||||||
|
// - /Workspace/Users/jane@doe.com/myfile
|
||||||
|
// - /Users/jane@doe.com/myfile
|
||||||
|
// - /Shared/project/myfile
|
||||||
|
//
|
||||||
|
// The following paths are not considered workspace paths:
|
||||||
|
//
|
||||||
|
// - myfile.txt
|
||||||
|
// - ./myfile.txt
|
||||||
|
// - ../myfile.txt
|
||||||
|
// - /foo/bar/myfile.txt
|
||||||
|
func IsWorkspacePath(path string) bool {
|
||||||
|
return strings.HasPrefix(path, "/Workspace/") ||
|
||||||
|
strings.HasPrefix(path, "/Users/") ||
|
||||||
|
strings.HasPrefix(path, "/Shared/")
|
||||||
|
}
|
||||||
|
|
||||||
|
// IsWorkspaceLibrary returns true if the specified library refers to a workspace path.
|
||||||
|
func IsWorkspaceLibrary(library *compute.Library) bool {
|
||||||
|
path := libraryPath(library)
|
||||||
|
if path == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return IsWorkspacePath(path)
|
||||||
|
}
|
|
@ -0,0 +1,33 @@
|
||||||
|
package libraries
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/databricks/databricks-sdk-go/service/compute"
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestIsWorkspacePath(t *testing.T) {
|
||||||
|
// Absolute paths with particular prefixes.
|
||||||
|
assert.True(t, IsWorkspacePath("/Workspace/path/to/package"))
|
||||||
|
assert.True(t, IsWorkspacePath("/Users/path/to/package"))
|
||||||
|
assert.True(t, IsWorkspacePath("/Shared/path/to/package"))
|
||||||
|
|
||||||
|
// Relative paths.
|
||||||
|
assert.False(t, IsWorkspacePath("myfile.txt"))
|
||||||
|
assert.False(t, IsWorkspacePath("./myfile.txt"))
|
||||||
|
assert.False(t, IsWorkspacePath("../myfile.txt"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIsWorkspaceLibrary(t *testing.T) {
|
||||||
|
// Workspace paths.
|
||||||
|
assert.True(t, IsWorkspaceLibrary(&compute.Library{Whl: "/Workspace/path/to/file.whl"}))
|
||||||
|
|
||||||
|
// Non-workspace paths.
|
||||||
|
assert.False(t, IsWorkspaceLibrary(&compute.Library{Whl: "./file.whl"}))
|
||||||
|
assert.False(t, IsWorkspaceLibrary(&compute.Library{Jar: "../target/some.jar"}))
|
||||||
|
assert.False(t, IsWorkspaceLibrary(&compute.Library{Jar: "s3:/bucket/path/some.jar"}))
|
||||||
|
|
||||||
|
// Empty.
|
||||||
|
assert.False(t, IsWorkspaceLibrary(&compute.Library{}))
|
||||||
|
}
|
|
@ -84,5 +84,4 @@ func TestBundlePythonWheelBuildNoBuildJustUpload(t *testing.T) {
|
||||||
"package",
|
"package",
|
||||||
"my_test_code-0.0.1-py3-none-any.whl",
|
"my_test_code-0.0.1-py3-none-any.whl",
|
||||||
))
|
))
|
||||||
require.True(t, artifact.Files[0].NeedsUpload())
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
package bundle
|
package bundle
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
@ -11,9 +10,11 @@ import (
|
||||||
"github.com/databricks/cli/bundle"
|
"github.com/databricks/cli/bundle"
|
||||||
"github.com/databricks/cli/bundle/artifacts"
|
"github.com/databricks/cli/bundle/artifacts"
|
||||||
"github.com/databricks/cli/bundle/config"
|
"github.com/databricks/cli/bundle/config"
|
||||||
|
"github.com/databricks/cli/bundle/config/resources"
|
||||||
"github.com/databricks/cli/internal"
|
"github.com/databricks/cli/internal"
|
||||||
"github.com/databricks/databricks-sdk-go"
|
"github.com/databricks/cli/internal/acc"
|
||||||
"github.com/databricks/databricks-sdk-go/service/compute"
|
"github.com/databricks/databricks-sdk-go/service/compute"
|
||||||
|
"github.com/databricks/databricks-sdk-go/service/jobs"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -26,8 +27,8 @@ func touchEmptyFile(t *testing.T, path string) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAccUploadArtifactFileToCorrectRemotePath(t *testing.T) {
|
func TestAccUploadArtifactFileToCorrectRemotePath(t *testing.T) {
|
||||||
t.Log(internal.GetEnvOrSkipTest(t, "CLOUD_ENV"))
|
ctx, wt := acc.WorkspaceTest(t)
|
||||||
|
w := wt.W
|
||||||
dir := t.TempDir()
|
dir := t.TempDir()
|
||||||
whlPath := filepath.Join(dir, "dist", "test.whl")
|
whlPath := filepath.Join(dir, "dist", "test.whl")
|
||||||
touchEmptyFile(t, whlPath)
|
touchEmptyFile(t, whlPath)
|
||||||
|
@ -37,14 +38,10 @@ func TestAccUploadArtifactFileToCorrectRemotePath(t *testing.T) {
|
||||||
Files: []config.ArtifactFile{
|
Files: []config.ArtifactFile{
|
||||||
{
|
{
|
||||||
Source: whlPath,
|
Source: whlPath,
|
||||||
Libraries: []*compute.Library{
|
|
||||||
{Whl: "dist\\test.whl"},
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
w := databricks.Must(databricks.NewWorkspaceClient())
|
|
||||||
wsDir := internal.TemporaryWorkspaceDir(t, w)
|
wsDir := internal.TemporaryWorkspaceDir(t, w)
|
||||||
|
|
||||||
b := &bundle.Bundle{
|
b := &bundle.Bundle{
|
||||||
|
@ -59,11 +56,33 @@ func TestAccUploadArtifactFileToCorrectRemotePath(t *testing.T) {
|
||||||
Artifacts: config.Artifacts{
|
Artifacts: config.Artifacts{
|
||||||
"test": artifact,
|
"test": artifact,
|
||||||
},
|
},
|
||||||
|
Resources: config.Resources{
|
||||||
|
Jobs: map[string]*resources.Job{
|
||||||
|
"test": {
|
||||||
|
JobSettings: &jobs.JobSettings{
|
||||||
|
Tasks: []jobs.Task{
|
||||||
|
{
|
||||||
|
Libraries: []compute.Library{
|
||||||
|
{
|
||||||
|
Whl: "dist/test.whl",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
err := bundle.Apply(context.Background(), b, artifacts.BasicUpload("test"))
|
err := bundle.Apply(ctx, b, artifacts.BasicUpload("test"))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// The remote path attribute on the artifact file should have been set.
|
||||||
require.Regexp(t, regexp.MustCompile(path.Join(regexp.QuoteMeta(wsDir), `.internal/test\.whl`)), artifact.Files[0].RemotePath)
|
require.Regexp(t, regexp.MustCompile(path.Join(regexp.QuoteMeta(wsDir), `.internal/test\.whl`)), artifact.Files[0].RemotePath)
|
||||||
require.Regexp(t, regexp.MustCompile(path.Join("/Workspace", regexp.QuoteMeta(wsDir), `.internal/test\.whl`)), artifact.Files[0].Libraries[0].Whl)
|
|
||||||
|
// The task library path should have been updated to the remote path.
|
||||||
|
lib := b.Config.Resources.Jobs["test"].JobSettings.Tasks[0].Libraries[0]
|
||||||
|
require.Regexp(t, regexp.MustCompile(path.Join("/Workspace", regexp.QuoteMeta(wsDir), `.internal/test\.whl`)), lib.Whl)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue