mirror of https://github.com/databricks/cli.git
Disallow notebooks in paths where files are expected (#573)
## Changes Uploading a notebook strips it's file extension. This PR returns an error if a notebook is specified where a file is expected. For example: A spark python task in a job or `libraries.file.path` DLT library (where instead `libraries.notebook.path` should be used This PR also adds test coverage for the opposite case, when files are not notebooks where notebooks are expected. ## Tests Integration tests and manually
This commit is contained in:
parent
650fb0e8b6
commit
f00488d81d
|
@ -2,6 +2,7 @@ package mutator
|
|||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
|
@ -14,6 +15,22 @@ import (
|
|||
"github.com/databricks/databricks-sdk-go/service/pipelines"
|
||||
)
|
||||
|
||||
type ErrIsNotebook struct {
|
||||
path string
|
||||
}
|
||||
|
||||
func (err ErrIsNotebook) Error() string {
|
||||
return fmt.Sprintf("file at %s is a notebook", err.path)
|
||||
}
|
||||
|
||||
type ErrIsNotNotebook struct {
|
||||
path string
|
||||
}
|
||||
|
||||
func (err ErrIsNotNotebook) Error() string {
|
||||
return fmt.Sprintf("file at %s is not a notebook", err.path)
|
||||
}
|
||||
|
||||
type translatePaths struct {
|
||||
seen map[string]string
|
||||
}
|
||||
|
@ -86,7 +103,7 @@ func (m *translatePaths) translateNotebookPath(literal, localPath, remotePath st
|
|||
return "", fmt.Errorf("unable to determine if %s is a notebook: %w", localPath, err)
|
||||
}
|
||||
if !nb {
|
||||
return "", fmt.Errorf("file at %s is not a notebook", localPath)
|
||||
return "", ErrIsNotNotebook{localPath}
|
||||
}
|
||||
|
||||
// Upon import, notebooks are stripped of their extension.
|
||||
|
@ -94,14 +111,16 @@ func (m *translatePaths) translateNotebookPath(literal, localPath, remotePath st
|
|||
}
|
||||
|
||||
func (m *translatePaths) translateFilePath(literal, localPath, remotePath string) (string, error) {
|
||||
_, err := os.Stat(localPath)
|
||||
nb, _, err := notebook.Detect(localPath)
|
||||
if os.IsNotExist(err) {
|
||||
return "", fmt.Errorf("file %s not found", literal)
|
||||
}
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to access %s: %w", localPath, err)
|
||||
return "", fmt.Errorf("unable to determine if %s is not a notebook: %w", localPath, err)
|
||||
}
|
||||
if nb {
|
||||
return "", ErrIsNotebook{localPath}
|
||||
}
|
||||
|
||||
return remotePath, nil
|
||||
}
|
||||
|
||||
|
@ -110,6 +129,9 @@ func (m *translatePaths) translateJobTask(dir string, b *bundle.Bundle, task *jo
|
|||
|
||||
if task.NotebookTask != nil {
|
||||
err = m.rewritePath(dir, b, &task.NotebookTask.NotebookPath, m.translateNotebookPath)
|
||||
if target := (&ErrIsNotNotebook{}); errors.As(err, target) {
|
||||
return fmt.Errorf(`expected a notebook for "tasks.notebook_task.notebook_path" but got a file: %w`, target)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -117,6 +139,9 @@ func (m *translatePaths) translateJobTask(dir string, b *bundle.Bundle, task *jo
|
|||
|
||||
if task.SparkPythonTask != nil {
|
||||
err = m.rewritePath(dir, b, &task.SparkPythonTask.PythonFile, m.translateFilePath)
|
||||
if target := (&ErrIsNotebook{}); errors.As(err, target) {
|
||||
return fmt.Errorf(`expected a file for "tasks.spark_python_task.python_file" but got a notebook: %w`, target)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -130,6 +155,9 @@ func (m *translatePaths) translatePipelineLibrary(dir string, b *bundle.Bundle,
|
|||
|
||||
if library.Notebook != nil {
|
||||
err = m.rewritePath(dir, b, &library.Notebook.Path, m.translateNotebookPath)
|
||||
if target := (&ErrIsNotNotebook{}); errors.As(err, target) {
|
||||
return fmt.Errorf(`expected a notebook for "libraries.notebook.path" but got a file: %w`, target)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
@ -137,6 +165,9 @@ func (m *translatePaths) translatePipelineLibrary(dir string, b *bundle.Bundle,
|
|||
|
||||
if library.File != nil {
|
||||
err = m.rewritePath(dir, b, &library.File.Path, m.translateFilePath)
|
||||
if target := (&ErrIsNotebook{}); errors.As(err, target) {
|
||||
return fmt.Errorf(`expected a file for "libraries.file.path" but got a notebook: %w`, target)
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
|
@ -455,3 +455,143 @@ func TestPipelineFileDoesNotExistError(t *testing.T) {
|
|||
err := mutator.TranslatePaths().Apply(context.Background(), bundle)
|
||||
assert.EqualError(t, err, "file ./doesnt_exist.py not found")
|
||||
}
|
||||
|
||||
func TestJobSparkPythonTaskWithNotebookSourceError(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
touchNotebookFile(t, filepath.Join(dir, "my_notebook.py"))
|
||||
|
||||
bundle := &bundle.Bundle{
|
||||
Config: config.Root{
|
||||
Path: dir,
|
||||
Workspace: config.Workspace{
|
||||
FilesPath: "/bundle",
|
||||
},
|
||||
Resources: config.Resources{
|
||||
Jobs: map[string]*resources.Job{
|
||||
"job": {
|
||||
Paths: resources.Paths{
|
||||
ConfigFilePath: filepath.Join(dir, "resource.yml"),
|
||||
},
|
||||
JobSettings: &jobs.JobSettings{
|
||||
Tasks: []jobs.Task{
|
||||
{
|
||||
SparkPythonTask: &jobs.SparkPythonTask{
|
||||
PythonFile: "./my_notebook.py",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
err := mutator.TranslatePaths().Apply(context.Background(), bundle)
|
||||
assert.ErrorContains(t, err, `expected a file for "tasks.spark_python_task.python_file" but got a notebook`)
|
||||
}
|
||||
|
||||
func TestJobNotebookTaskWithFileSourceError(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
touchEmptyFile(t, filepath.Join(dir, "my_file.py"))
|
||||
|
||||
bundle := &bundle.Bundle{
|
||||
Config: config.Root{
|
||||
Path: dir,
|
||||
Workspace: config.Workspace{
|
||||
FilesPath: "/bundle",
|
||||
},
|
||||
Resources: config.Resources{
|
||||
Jobs: map[string]*resources.Job{
|
||||
"job": {
|
||||
Paths: resources.Paths{
|
||||
ConfigFilePath: filepath.Join(dir, "resource.yml"),
|
||||
},
|
||||
JobSettings: &jobs.JobSettings{
|
||||
Tasks: []jobs.Task{
|
||||
{
|
||||
NotebookTask: &jobs.NotebookTask{
|
||||
NotebookPath: "./my_file.py",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
err := mutator.TranslatePaths().Apply(context.Background(), bundle)
|
||||
assert.ErrorContains(t, err, `expected a notebook for "tasks.notebook_task.notebook_path" but got a file`)
|
||||
}
|
||||
|
||||
func TestPipelineNotebookLibraryWithFileSourceError(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
touchEmptyFile(t, filepath.Join(dir, "my_file.py"))
|
||||
|
||||
bundle := &bundle.Bundle{
|
||||
Config: config.Root{
|
||||
Path: dir,
|
||||
Workspace: config.Workspace{
|
||||
FilesPath: "/bundle",
|
||||
},
|
||||
Resources: config.Resources{
|
||||
Pipelines: map[string]*resources.Pipeline{
|
||||
"pipeline": {
|
||||
Paths: resources.Paths{
|
||||
ConfigFilePath: filepath.Join(dir, "resource.yml"),
|
||||
},
|
||||
PipelineSpec: &pipelines.PipelineSpec{
|
||||
Libraries: []pipelines.PipelineLibrary{
|
||||
{
|
||||
Notebook: &pipelines.NotebookLibrary{
|
||||
Path: "./my_file.py",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
err := mutator.TranslatePaths().Apply(context.Background(), bundle)
|
||||
assert.ErrorContains(t, err, `expected a notebook for "libraries.notebook.path" but got a file`)
|
||||
}
|
||||
|
||||
func TestPipelineFileLibraryWithNotebookSourceError(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
touchNotebookFile(t, filepath.Join(dir, "my_notebook.py"))
|
||||
|
||||
bundle := &bundle.Bundle{
|
||||
Config: config.Root{
|
||||
Path: dir,
|
||||
Workspace: config.Workspace{
|
||||
FilesPath: "/bundle",
|
||||
},
|
||||
Resources: config.Resources{
|
||||
Pipelines: map[string]*resources.Pipeline{
|
||||
"pipeline": {
|
||||
Paths: resources.Paths{
|
||||
ConfigFilePath: filepath.Join(dir, "resource.yml"),
|
||||
},
|
||||
PipelineSpec: &pipelines.PipelineSpec{
|
||||
Libraries: []pipelines.PipelineLibrary{
|
||||
{
|
||||
File: &pipelines.FileLibrary{
|
||||
Path: "./my_notebook.py",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
err := mutator.TranslatePaths().Apply(context.Background(), bundle)
|
||||
assert.ErrorContains(t, err, `expected a file for "libraries.file.path" but got a notebook`)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue