Disallow notebooks in paths where files are expected (#573)

## Changes
Uploading a notebook strips it's file extension. This PR returns an
error if a notebook is specified where a file is expected. For example:
A spark python task in a job or `libraries.file.path` DLT library (where
instead `libraries.notebook.path` should be used

This PR also adds test coverage for the opposite case, when files are
not notebooks where notebooks are expected.

## Tests
Integration tests and manually
This commit is contained in:
shreyas-goenka 2023-07-12 14:25:00 +02:00 committed by GitHub
parent 650fb0e8b6
commit f00488d81d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 175 additions and 4 deletions

View File

@ -2,6 +2,7 @@ package mutator
import (
"context"
"errors"
"fmt"
"os"
"path"
@ -14,6 +15,22 @@ import (
"github.com/databricks/databricks-sdk-go/service/pipelines"
)
type ErrIsNotebook struct {
path string
}
func (err ErrIsNotebook) Error() string {
return fmt.Sprintf("file at %s is a notebook", err.path)
}
type ErrIsNotNotebook struct {
path string
}
func (err ErrIsNotNotebook) Error() string {
return fmt.Sprintf("file at %s is not a notebook", err.path)
}
type translatePaths struct {
seen map[string]string
}
@ -86,7 +103,7 @@ func (m *translatePaths) translateNotebookPath(literal, localPath, remotePath st
return "", fmt.Errorf("unable to determine if %s is a notebook: %w", localPath, err)
}
if !nb {
return "", fmt.Errorf("file at %s is not a notebook", localPath)
return "", ErrIsNotNotebook{localPath}
}
// Upon import, notebooks are stripped of their extension.
@ -94,14 +111,16 @@ func (m *translatePaths) translateNotebookPath(literal, localPath, remotePath st
}
func (m *translatePaths) translateFilePath(literal, localPath, remotePath string) (string, error) {
_, err := os.Stat(localPath)
nb, _, err := notebook.Detect(localPath)
if os.IsNotExist(err) {
return "", fmt.Errorf("file %s not found", literal)
}
if err != nil {
return "", fmt.Errorf("unable to access %s: %w", localPath, err)
return "", fmt.Errorf("unable to determine if %s is not a notebook: %w", localPath, err)
}
if nb {
return "", ErrIsNotebook{localPath}
}
return remotePath, nil
}
@ -110,6 +129,9 @@ func (m *translatePaths) translateJobTask(dir string, b *bundle.Bundle, task *jo
if task.NotebookTask != nil {
err = m.rewritePath(dir, b, &task.NotebookTask.NotebookPath, m.translateNotebookPath)
if target := (&ErrIsNotNotebook{}); errors.As(err, target) {
return fmt.Errorf(`expected a notebook for "tasks.notebook_task.notebook_path" but got a file: %w`, target)
}
if err != nil {
return err
}
@ -117,6 +139,9 @@ func (m *translatePaths) translateJobTask(dir string, b *bundle.Bundle, task *jo
if task.SparkPythonTask != nil {
err = m.rewritePath(dir, b, &task.SparkPythonTask.PythonFile, m.translateFilePath)
if target := (&ErrIsNotebook{}); errors.As(err, target) {
return fmt.Errorf(`expected a file for "tasks.spark_python_task.python_file" but got a notebook: %w`, target)
}
if err != nil {
return err
}
@ -130,6 +155,9 @@ func (m *translatePaths) translatePipelineLibrary(dir string, b *bundle.Bundle,
if library.Notebook != nil {
err = m.rewritePath(dir, b, &library.Notebook.Path, m.translateNotebookPath)
if target := (&ErrIsNotNotebook{}); errors.As(err, target) {
return fmt.Errorf(`expected a notebook for "libraries.notebook.path" but got a file: %w`, target)
}
if err != nil {
return err
}
@ -137,6 +165,9 @@ func (m *translatePaths) translatePipelineLibrary(dir string, b *bundle.Bundle,
if library.File != nil {
err = m.rewritePath(dir, b, &library.File.Path, m.translateFilePath)
if target := (&ErrIsNotebook{}); errors.As(err, target) {
return fmt.Errorf(`expected a file for "libraries.file.path" but got a notebook: %w`, target)
}
if err != nil {
return err
}

View File

@ -455,3 +455,143 @@ func TestPipelineFileDoesNotExistError(t *testing.T) {
err := mutator.TranslatePaths().Apply(context.Background(), bundle)
assert.EqualError(t, err, "file ./doesnt_exist.py not found")
}
func TestJobSparkPythonTaskWithNotebookSourceError(t *testing.T) {
dir := t.TempDir()
touchNotebookFile(t, filepath.Join(dir, "my_notebook.py"))
bundle := &bundle.Bundle{
Config: config.Root{
Path: dir,
Workspace: config.Workspace{
FilesPath: "/bundle",
},
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job": {
Paths: resources.Paths{
ConfigFilePath: filepath.Join(dir, "resource.yml"),
},
JobSettings: &jobs.JobSettings{
Tasks: []jobs.Task{
{
SparkPythonTask: &jobs.SparkPythonTask{
PythonFile: "./my_notebook.py",
},
},
},
},
},
},
},
},
}
err := mutator.TranslatePaths().Apply(context.Background(), bundle)
assert.ErrorContains(t, err, `expected a file for "tasks.spark_python_task.python_file" but got a notebook`)
}
func TestJobNotebookTaskWithFileSourceError(t *testing.T) {
dir := t.TempDir()
touchEmptyFile(t, filepath.Join(dir, "my_file.py"))
bundle := &bundle.Bundle{
Config: config.Root{
Path: dir,
Workspace: config.Workspace{
FilesPath: "/bundle",
},
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job": {
Paths: resources.Paths{
ConfigFilePath: filepath.Join(dir, "resource.yml"),
},
JobSettings: &jobs.JobSettings{
Tasks: []jobs.Task{
{
NotebookTask: &jobs.NotebookTask{
NotebookPath: "./my_file.py",
},
},
},
},
},
},
},
},
}
err := mutator.TranslatePaths().Apply(context.Background(), bundle)
assert.ErrorContains(t, err, `expected a notebook for "tasks.notebook_task.notebook_path" but got a file`)
}
func TestPipelineNotebookLibraryWithFileSourceError(t *testing.T) {
dir := t.TempDir()
touchEmptyFile(t, filepath.Join(dir, "my_file.py"))
bundle := &bundle.Bundle{
Config: config.Root{
Path: dir,
Workspace: config.Workspace{
FilesPath: "/bundle",
},
Resources: config.Resources{
Pipelines: map[string]*resources.Pipeline{
"pipeline": {
Paths: resources.Paths{
ConfigFilePath: filepath.Join(dir, "resource.yml"),
},
PipelineSpec: &pipelines.PipelineSpec{
Libraries: []pipelines.PipelineLibrary{
{
Notebook: &pipelines.NotebookLibrary{
Path: "./my_file.py",
},
},
},
},
},
},
},
},
}
err := mutator.TranslatePaths().Apply(context.Background(), bundle)
assert.ErrorContains(t, err, `expected a notebook for "libraries.notebook.path" but got a file`)
}
func TestPipelineFileLibraryWithNotebookSourceError(t *testing.T) {
dir := t.TempDir()
touchNotebookFile(t, filepath.Join(dir, "my_notebook.py"))
bundle := &bundle.Bundle{
Config: config.Root{
Path: dir,
Workspace: config.Workspace{
FilesPath: "/bundle",
},
Resources: config.Resources{
Pipelines: map[string]*resources.Pipeline{
"pipeline": {
Paths: resources.Paths{
ConfigFilePath: filepath.Join(dir, "resource.yml"),
},
PipelineSpec: &pipelines.PipelineSpec{
Libraries: []pipelines.PipelineLibrary{
{
File: &pipelines.FileLibrary{
Path: "./my_notebook.py",
},
},
},
},
},
},
},
},
}
err := mutator.TranslatePaths().Apply(context.Background(), bundle)
assert.ErrorContains(t, err, `expected a file for "libraries.file.path" but got a notebook`)
}