Error out if notebook file does not exist locally (#261)

Adds check for whether file exists locally

case 1: local (relative) file does not exist
```
    foo:
      name: "[job-output] test-job by shreyas"

      tasks:
        - task_key: my_notebook_task
          existing_cluster_id: ***
          notebook_task:
            notebook_path: "./doesnotexist"
```
output:
```
shreyas.goenka@THW32HFW6T job-output % bricks bundle deploy
Error: notebook ./doesnotexist not found. Error: open /Users/shreyas.goenka/projects/job-output/doesnotexist: no such file or directory
```


case 2: remote (absolute) file does not exist
```
    foo:
      name: "[job-output] test-job by shreyas"

      tasks:
        - task_key: my_notebook_task
          existing_cluster_id: ***
          notebook_task:
            notebook_path: "/Users/shreyas.goenka@databricks.com/doesnotexist"
```

output:
```
shreyas.goenka@THW32HFW6T job-output % bricks bundle deploy
shreyas.goenka@THW32HFW6T job-output % bricks bundle run foo
Error: failed to reach TERMINATED or SKIPPED, got INTERNAL_ERROR: Task my_notebook_task failed with message: Notebook not found: /Users/shreyas.goenka@databricks.com/doesnotexist. This caused all downstream tasks to get skipped.
```

case 3: remote exists
Successful deploy and run
This commit is contained in:
shreyas-goenka 2023-03-21 18:13:16 +01:00 committed by GitHub
parent 9100680162
commit 75d516939b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 86 additions and 19 deletions

View File

@ -26,6 +26,10 @@ func (m *translateNotebookPaths) Name() string {
}
func (m *translateNotebookPaths) rewritePath(b *bundle.Bundle, p *string) error {
// We assume absolute paths point to a location in the workspace
if path.IsAbs(*p) {
return nil
}
relPath := path.Clean(*p)
if interp, ok := m.seen[relPath]; ok {
*p = interp
@ -34,14 +38,12 @@ func (m *translateNotebookPaths) rewritePath(b *bundle.Bundle, p *string) error
absPath := filepath.Join(b.Config.Path, relPath)
nb, _, err := notebook.Detect(absPath)
if err != nil {
// Ignore if this file doesn't exist. Maybe it's an absolute workspace path?
if os.IsNotExist(err) {
return nil
return fmt.Errorf("notebook %s not found: %w", *p, err)
}
if err != nil {
return fmt.Errorf("unable to determine if %s is a notebook: %w", relPath, err)
}
if !nb {
return fmt.Errorf("file at %s is not a notebook", relPath)
}

View File

@ -43,7 +43,7 @@ func TestNotebookPaths(t *testing.T) {
},
{
NotebookTask: &jobs.NotebookTask{
NotebookPath: "./doesnt_exist.py",
NotebookPath: "/Users/jane.doe@databricks.com/doesnt_exist.py",
},
},
{
@ -71,7 +71,7 @@ func TestNotebookPaths(t *testing.T) {
},
{
Notebook: &pipelines.NotebookLibrary{
Path: "./doesnt_exist.py",
Path: "/Users/jane.doe@databricks.com/doesnt_exist.py",
},
},
{
@ -101,7 +101,7 @@ func TestNotebookPaths(t *testing.T) {
)
assert.Equal(
t,
"./doesnt_exist.py",
"/Users/jane.doe@databricks.com/doesnt_exist.py",
bundle.Config.Resources.Jobs["job"].Tasks[1].NotebookTask.NotebookPath,
)
assert.Equal(
@ -118,7 +118,7 @@ func TestNotebookPaths(t *testing.T) {
)
assert.Equal(
t,
"./doesnt_exist.py",
"/Users/jane.doe@databricks.com/doesnt_exist.py",
bundle.Config.Resources.Pipelines["pipeline"].Libraries[1].Notebook.Path,
)
assert.Equal(
@ -127,3 +127,59 @@ func TestNotebookPaths(t *testing.T) {
bundle.Config.Resources.Pipelines["pipeline"].Libraries[2].Notebook.Path,
)
}
func TestJobNotebookDoesNotExistError(t *testing.T) {
dir := t.TempDir()
bundle := &bundle.Bundle{
Config: config.Root{
Path: dir,
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"job": {
JobSettings: &jobs.JobSettings{
Tasks: []jobs.JobTaskSettings{
{
NotebookTask: &jobs.NotebookTask{
NotebookPath: "./doesnt_exist.py",
},
},
},
},
},
},
},
},
}
_, err := mutator.TranslateNotebookPaths().Apply(context.Background(), bundle)
assert.ErrorContains(t, err, "notebook ./doesnt_exist.py not found")
}
func TestPipelineNotebookDoesNotExistError(t *testing.T) {
dir := t.TempDir()
bundle := &bundle.Bundle{
Config: config.Root{
Path: dir,
Resources: config.Resources{
Pipelines: map[string]*resources.Pipeline{
"pipeline": {
PipelineSpec: &pipelines.PipelineSpec{
Libraries: []pipelines.PipelineLibrary{
{
Notebook: &pipelines.NotebookLibrary{
Path: "./doesnt_exist.py",
},
},
},
},
},
},
},
},
}
_, err := mutator.TranslateNotebookPaths().Apply(context.Background(), bundle)
assert.ErrorContains(t, err, "notebook ./doesnt_exist.py not found")
}

View File

@ -39,6 +39,14 @@ func readHeader(path string) ([]byte, error) {
func Detect(path string) (notebook bool, language workspace.Language, err error) {
header := ""
buf, err := readHeader(path)
if err != nil {
return false, "", err
}
scanner := bufio.NewScanner(bytes.NewReader(buf))
scanner.Scan()
fileHeader := scanner.Text()
// Determine which header to expect based on filename extension.
ext := strings.ToLower(filepath.Ext(path))
switch ext {
@ -60,14 +68,7 @@ func Detect(path string) (notebook bool, language workspace.Language, err error)
return false, "", nil
}
buf, err := readHeader(path)
if err != nil {
return false, "", err
}
scanner := bufio.NewScanner(bytes.NewReader(buf))
scanner.Scan()
if scanner.Text() != header {
if fileHeader != header {
return false, "", nil
}

View File

@ -49,13 +49,19 @@ func TestDetectCallsDetectJupyter(t *testing.T) {
}
func TestDetectUnknownExtension(t *testing.T) {
nb, _, err := Detect("./testdata/doesntexist.foobar")
_, _, err := Detect("./testdata/doesntexist.foobar")
assert.True(t, os.IsNotExist(err))
nb, _, err := Detect("./testdata/unknown_extension.foobar")
require.NoError(t, err)
assert.False(t, nb)
}
func TestDetectNoExtension(t *testing.T) {
nb, _, err := Detect("./testdata/doesntexist")
_, _, err := Detect("./testdata/doesntexist")
assert.True(t, os.IsNotExist(err))
nb, _, err := Detect("./testdata/no_extension")
require.NoError(t, err)
assert.False(t, nb)
}

1
libs/notebook/testdata/no_extension vendored Normal file
View File

@ -0,0 +1 @@
# Databricks notebook source

View File

@ -0,0 +1 @@
# Databricks notebook source