From 75d516939bc1314da2ddb51160e7875de0da64a0 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Tue, 21 Mar 2023 18:13:16 +0100 Subject: [PATCH] Error out if notebook file does not exist locally (#261) Adds check for whether file exists locally case 1: local (relative) file does not exist ``` foo: name: "[job-output] test-job by shreyas" tasks: - task_key: my_notebook_task existing_cluster_id: *** notebook_task: notebook_path: "./doesnotexist" ``` output: ``` shreyas.goenka@THW32HFW6T job-output % bricks bundle deploy Error: notebook ./doesnotexist not found. Error: open /Users/shreyas.goenka/projects/job-output/doesnotexist: no such file or directory ``` case 2: remote (absolute) file does not exist ``` foo: name: "[job-output] test-job by shreyas" tasks: - task_key: my_notebook_task existing_cluster_id: *** notebook_task: notebook_path: "/Users/shreyas.goenka@databricks.com/doesnotexist" ``` output: ``` shreyas.goenka@THW32HFW6T job-output % bricks bundle deploy shreyas.goenka@THW32HFW6T job-output % bricks bundle run foo Error: failed to reach TERMINATED or SKIPPED, got INTERNAL_ERROR: Task my_notebook_task failed with message: Notebook not found: /Users/shreyas.goenka@databricks.com/doesnotexist. This caused all downstream tasks to get skipped. ``` case 3: remote exists Successful deploy and run --- .../mutator/translate_notebook_paths.go | 12 ++-- .../mutator/translate_notebook_paths_test.go | 64 +++++++++++++++++-- libs/notebook/detect.go | 17 ++--- libs/notebook/detect_test.go | 10 ++- libs/notebook/testdata/no_extension | 1 + .../testdata/unknown_extension.foobar | 1 + 6 files changed, 86 insertions(+), 19 deletions(-) create mode 100644 libs/notebook/testdata/no_extension create mode 100644 libs/notebook/testdata/unknown_extension.foobar diff --git a/bundle/config/mutator/translate_notebook_paths.go b/bundle/config/mutator/translate_notebook_paths.go index 0e629d3d..8f1d65be 100644 --- a/bundle/config/mutator/translate_notebook_paths.go +++ b/bundle/config/mutator/translate_notebook_paths.go @@ -26,6 +26,10 @@ func (m *translateNotebookPaths) Name() string { } func (m *translateNotebookPaths) rewritePath(b *bundle.Bundle, p *string) error { + // We assume absolute paths point to a location in the workspace + if path.IsAbs(*p) { + return nil + } relPath := path.Clean(*p) if interp, ok := m.seen[relPath]; ok { *p = interp @@ -34,14 +38,12 @@ func (m *translateNotebookPaths) rewritePath(b *bundle.Bundle, p *string) error absPath := filepath.Join(b.Config.Path, relPath) nb, _, err := notebook.Detect(absPath) + if os.IsNotExist(err) { + return fmt.Errorf("notebook %s not found: %w", *p, err) + } if err != nil { - // Ignore if this file doesn't exist. Maybe it's an absolute workspace path? - if os.IsNotExist(err) { - return nil - } return fmt.Errorf("unable to determine if %s is a notebook: %w", relPath, err) } - if !nb { return fmt.Errorf("file at %s is not a notebook", relPath) } diff --git a/bundle/config/mutator/translate_notebook_paths_test.go b/bundle/config/mutator/translate_notebook_paths_test.go index e10222b8..1a9f18f8 100644 --- a/bundle/config/mutator/translate_notebook_paths_test.go +++ b/bundle/config/mutator/translate_notebook_paths_test.go @@ -43,7 +43,7 @@ func TestNotebookPaths(t *testing.T) { }, { NotebookTask: &jobs.NotebookTask{ - NotebookPath: "./doesnt_exist.py", + NotebookPath: "/Users/jane.doe@databricks.com/doesnt_exist.py", }, }, { @@ -71,7 +71,7 @@ func TestNotebookPaths(t *testing.T) { }, { Notebook: &pipelines.NotebookLibrary{ - Path: "./doesnt_exist.py", + Path: "/Users/jane.doe@databricks.com/doesnt_exist.py", }, }, { @@ -101,7 +101,7 @@ func TestNotebookPaths(t *testing.T) { ) assert.Equal( t, - "./doesnt_exist.py", + "/Users/jane.doe@databricks.com/doesnt_exist.py", bundle.Config.Resources.Jobs["job"].Tasks[1].NotebookTask.NotebookPath, ) assert.Equal( @@ -118,7 +118,7 @@ func TestNotebookPaths(t *testing.T) { ) assert.Equal( t, - "./doesnt_exist.py", + "/Users/jane.doe@databricks.com/doesnt_exist.py", bundle.Config.Resources.Pipelines["pipeline"].Libraries[1].Notebook.Path, ) assert.Equal( @@ -127,3 +127,59 @@ func TestNotebookPaths(t *testing.T) { bundle.Config.Resources.Pipelines["pipeline"].Libraries[2].Notebook.Path, ) } + +func TestJobNotebookDoesNotExistError(t *testing.T) { + dir := t.TempDir() + + bundle := &bundle.Bundle{ + Config: config.Root{ + Path: dir, + Resources: config.Resources{ + Jobs: map[string]*resources.Job{ + "job": { + JobSettings: &jobs.JobSettings{ + Tasks: []jobs.JobTaskSettings{ + { + NotebookTask: &jobs.NotebookTask{ + NotebookPath: "./doesnt_exist.py", + }, + }, + }, + }, + }, + }, + }, + }, + } + + _, err := mutator.TranslateNotebookPaths().Apply(context.Background(), bundle) + assert.ErrorContains(t, err, "notebook ./doesnt_exist.py not found") +} + +func TestPipelineNotebookDoesNotExistError(t *testing.T) { + dir := t.TempDir() + + bundle := &bundle.Bundle{ + Config: config.Root{ + Path: dir, + Resources: config.Resources{ + Pipelines: map[string]*resources.Pipeline{ + "pipeline": { + PipelineSpec: &pipelines.PipelineSpec{ + Libraries: []pipelines.PipelineLibrary{ + { + Notebook: &pipelines.NotebookLibrary{ + Path: "./doesnt_exist.py", + }, + }, + }, + }, + }, + }, + }, + }, + } + + _, err := mutator.TranslateNotebookPaths().Apply(context.Background(), bundle) + assert.ErrorContains(t, err, "notebook ./doesnt_exist.py not found") +} diff --git a/libs/notebook/detect.go b/libs/notebook/detect.go index ad751fa0..17685f3b 100644 --- a/libs/notebook/detect.go +++ b/libs/notebook/detect.go @@ -39,6 +39,14 @@ func readHeader(path string) ([]byte, error) { func Detect(path string) (notebook bool, language workspace.Language, err error) { header := "" + buf, err := readHeader(path) + if err != nil { + return false, "", err + } + scanner := bufio.NewScanner(bytes.NewReader(buf)) + scanner.Scan() + fileHeader := scanner.Text() + // Determine which header to expect based on filename extension. ext := strings.ToLower(filepath.Ext(path)) switch ext { @@ -60,14 +68,7 @@ func Detect(path string) (notebook bool, language workspace.Language, err error) return false, "", nil } - buf, err := readHeader(path) - if err != nil { - return false, "", err - } - - scanner := bufio.NewScanner(bytes.NewReader(buf)) - scanner.Scan() - if scanner.Text() != header { + if fileHeader != header { return false, "", nil } diff --git a/libs/notebook/detect_test.go b/libs/notebook/detect_test.go index 74c13e70..5d3aa8a8 100644 --- a/libs/notebook/detect_test.go +++ b/libs/notebook/detect_test.go @@ -49,13 +49,19 @@ func TestDetectCallsDetectJupyter(t *testing.T) { } func TestDetectUnknownExtension(t *testing.T) { - nb, _, err := Detect("./testdata/doesntexist.foobar") + _, _, err := Detect("./testdata/doesntexist.foobar") + assert.True(t, os.IsNotExist(err)) + + nb, _, err := Detect("./testdata/unknown_extension.foobar") require.NoError(t, err) assert.False(t, nb) } func TestDetectNoExtension(t *testing.T) { - nb, _, err := Detect("./testdata/doesntexist") + _, _, err := Detect("./testdata/doesntexist") + assert.True(t, os.IsNotExist(err)) + + nb, _, err := Detect("./testdata/no_extension") require.NoError(t, err) assert.False(t, nb) } diff --git a/libs/notebook/testdata/no_extension b/libs/notebook/testdata/no_extension new file mode 100644 index 00000000..1645e04b --- /dev/null +++ b/libs/notebook/testdata/no_extension @@ -0,0 +1 @@ +# Databricks notebook source diff --git a/libs/notebook/testdata/unknown_extension.foobar b/libs/notebook/testdata/unknown_extension.foobar new file mode 100644 index 00000000..1645e04b --- /dev/null +++ b/libs/notebook/testdata/unknown_extension.foobar @@ -0,0 +1 @@ +# Databricks notebook source