From f00488d81d953068ba04c88a97e8d1055c27c309 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Wed, 12 Jul 2023 14:25:00 +0200 Subject: [PATCH] Disallow notebooks in paths where files are expected (#573) ## Changes Uploading a notebook strips it's file extension. This PR returns an error if a notebook is specified where a file is expected. For example: A spark python task in a job or `libraries.file.path` DLT library (where instead `libraries.notebook.path` should be used This PR also adds test coverage for the opposite case, when files are not notebooks where notebooks are expected. ## Tests Integration tests and manually --- bundle/config/mutator/translate_paths.go | 39 ++++- bundle/config/mutator/translate_paths_test.go | 140 ++++++++++++++++++ 2 files changed, 175 insertions(+), 4 deletions(-) diff --git a/bundle/config/mutator/translate_paths.go b/bundle/config/mutator/translate_paths.go index a7ccb3e9..08f83986 100644 --- a/bundle/config/mutator/translate_paths.go +++ b/bundle/config/mutator/translate_paths.go @@ -2,6 +2,7 @@ package mutator import ( "context" + "errors" "fmt" "os" "path" @@ -14,6 +15,22 @@ import ( "github.com/databricks/databricks-sdk-go/service/pipelines" ) +type ErrIsNotebook struct { + path string +} + +func (err ErrIsNotebook) Error() string { + return fmt.Sprintf("file at %s is a notebook", err.path) +} + +type ErrIsNotNotebook struct { + path string +} + +func (err ErrIsNotNotebook) Error() string { + return fmt.Sprintf("file at %s is not a notebook", err.path) +} + type translatePaths struct { seen map[string]string } @@ -86,7 +103,7 @@ func (m *translatePaths) translateNotebookPath(literal, localPath, remotePath st return "", fmt.Errorf("unable to determine if %s is a notebook: %w", localPath, err) } if !nb { - return "", fmt.Errorf("file at %s is not a notebook", localPath) + return "", ErrIsNotNotebook{localPath} } // Upon import, notebooks are stripped of their extension. @@ -94,14 +111,16 @@ func (m *translatePaths) translateNotebookPath(literal, localPath, remotePath st } func (m *translatePaths) translateFilePath(literal, localPath, remotePath string) (string, error) { - _, err := os.Stat(localPath) + nb, _, err := notebook.Detect(localPath) if os.IsNotExist(err) { return "", fmt.Errorf("file %s not found", literal) } if err != nil { - return "", fmt.Errorf("unable to access %s: %w", localPath, err) + return "", fmt.Errorf("unable to determine if %s is not a notebook: %w", localPath, err) + } + if nb { + return "", ErrIsNotebook{localPath} } - return remotePath, nil } @@ -110,6 +129,9 @@ func (m *translatePaths) translateJobTask(dir string, b *bundle.Bundle, task *jo if task.NotebookTask != nil { err = m.rewritePath(dir, b, &task.NotebookTask.NotebookPath, m.translateNotebookPath) + if target := (&ErrIsNotNotebook{}); errors.As(err, target) { + return fmt.Errorf(`expected a notebook for "tasks.notebook_task.notebook_path" but got a file: %w`, target) + } if err != nil { return err } @@ -117,6 +139,9 @@ func (m *translatePaths) translateJobTask(dir string, b *bundle.Bundle, task *jo if task.SparkPythonTask != nil { err = m.rewritePath(dir, b, &task.SparkPythonTask.PythonFile, m.translateFilePath) + if target := (&ErrIsNotebook{}); errors.As(err, target) { + return fmt.Errorf(`expected a file for "tasks.spark_python_task.python_file" but got a notebook: %w`, target) + } if err != nil { return err } @@ -130,6 +155,9 @@ func (m *translatePaths) translatePipelineLibrary(dir string, b *bundle.Bundle, if library.Notebook != nil { err = m.rewritePath(dir, b, &library.Notebook.Path, m.translateNotebookPath) + if target := (&ErrIsNotNotebook{}); errors.As(err, target) { + return fmt.Errorf(`expected a notebook for "libraries.notebook.path" but got a file: %w`, target) + } if err != nil { return err } @@ -137,6 +165,9 @@ func (m *translatePaths) translatePipelineLibrary(dir string, b *bundle.Bundle, if library.File != nil { err = m.rewritePath(dir, b, &library.File.Path, m.translateFilePath) + if target := (&ErrIsNotebook{}); errors.As(err, target) { + return fmt.Errorf(`expected a file for "libraries.file.path" but got a notebook: %w`, target) + } if err != nil { return err } diff --git a/bundle/config/mutator/translate_paths_test.go b/bundle/config/mutator/translate_paths_test.go index 1bcb8b1b..b87f4f67 100644 --- a/bundle/config/mutator/translate_paths_test.go +++ b/bundle/config/mutator/translate_paths_test.go @@ -455,3 +455,143 @@ func TestPipelineFileDoesNotExistError(t *testing.T) { err := mutator.TranslatePaths().Apply(context.Background(), bundle) assert.EqualError(t, err, "file ./doesnt_exist.py not found") } + +func TestJobSparkPythonTaskWithNotebookSourceError(t *testing.T) { + dir := t.TempDir() + touchNotebookFile(t, filepath.Join(dir, "my_notebook.py")) + + bundle := &bundle.Bundle{ + Config: config.Root{ + Path: dir, + Workspace: config.Workspace{ + FilesPath: "/bundle", + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job": { + Paths: resources.Paths{ + ConfigFilePath: filepath.Join(dir, "resource.yml"), + }, + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + { + SparkPythonTask: &jobs.SparkPythonTask{ + PythonFile: "./my_notebook.py", + }, + }, + }, + }, + }, + }, + }, + }, + } + + err := mutator.TranslatePaths().Apply(context.Background(), bundle) + assert.ErrorContains(t, err, `expected a file for "tasks.spark_python_task.python_file" but got a notebook`) +} + +func TestJobNotebookTaskWithFileSourceError(t *testing.T) { + dir := t.TempDir() + touchEmptyFile(t, filepath.Join(dir, "my_file.py")) + + bundle := &bundle.Bundle{ + Config: config.Root{ + Path: dir, + Workspace: config.Workspace{ + FilesPath: "/bundle", + }, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job": { + Paths: resources.Paths{ + ConfigFilePath: filepath.Join(dir, "resource.yml"), + }, + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.Task{ + { + NotebookTask: &jobs.NotebookTask{ + NotebookPath: "./my_file.py", + }, + }, + }, + }, + }, + }, + }, + }, + } + + err := mutator.TranslatePaths().Apply(context.Background(), bundle) + assert.ErrorContains(t, err, `expected a notebook for "tasks.notebook_task.notebook_path" but got a file`) +} + +func TestPipelineNotebookLibraryWithFileSourceError(t *testing.T) { + dir := t.TempDir() + touchEmptyFile(t, filepath.Join(dir, "my_file.py")) + + bundle := &bundle.Bundle{ + Config: config.Root{ + Path: dir, + Workspace: config.Workspace{ + FilesPath: "/bundle", + }, + Resources: config.Resources{ + Pipelines: map[string]*resources.Pipeline{ + "pipeline": { + Paths: resources.Paths{ + ConfigFilePath: filepath.Join(dir, "resource.yml"), + }, + PipelineSpec: &pipelines.PipelineSpec{ + Libraries: []pipelines.PipelineLibrary{ + { + Notebook: &pipelines.NotebookLibrary{ + Path: "./my_file.py", + }, + }, + }, + }, + }, + }, + }, + }, + } + + err := mutator.TranslatePaths().Apply(context.Background(), bundle) + assert.ErrorContains(t, err, `expected a notebook for "libraries.notebook.path" but got a file`) +} + +func TestPipelineFileLibraryWithNotebookSourceError(t *testing.T) { + dir := t.TempDir() + touchNotebookFile(t, filepath.Join(dir, "my_notebook.py")) + + bundle := &bundle.Bundle{ + Config: config.Root{ + Path: dir, + Workspace: config.Workspace{ + FilesPath: "/bundle", + }, + Resources: config.Resources{ + Pipelines: map[string]*resources.Pipeline{ + "pipeline": { + Paths: resources.Paths{ + ConfigFilePath: filepath.Join(dir, "resource.yml"), + }, + PipelineSpec: &pipelines.PipelineSpec{ + Libraries: []pipelines.PipelineLibrary{ + { + File: &pipelines.FileLibrary{ + Path: "./my_notebook.py", + }, + }, + }, + }, + }, + }, + }, + }, + } + + err := mutator.TranslatePaths().Apply(context.Background(), bundle) + assert.ErrorContains(t, err, `expected a file for "libraries.file.path" but got a notebook`) +}