Add support for non-Python ipynb notebooks to DABs (#1827)

## Changes

### Background

The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.

Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.

### Problem this PR addresses

The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.

This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.

Note: Any platform side changes we needed for the import API have
already been rolled out to production.

### Before

DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.

### After

DABs deploys both from local machines and DBR will work fine.

## Testing

For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.

For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.

### Discussion: Issues with configuring language at the cell level

The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
    "vscode": {
     "languageId": "sql"
    }
}
```

Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
This commit is contained in:
shreyas-goenka 2024-11-14 03:09:51 +05:30 committed by GitHub
parent 25838ee0af
commit e1978fa429
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
14 changed files with 637 additions and 271 deletions

View File

@ -39,7 +39,7 @@ func (f filerTest) assertContents(ctx context.Context, name string, contents str
assert.Equal(f, contents, body.String()) assert.Equal(f, contents, body.String())
} }
func (f filerTest) assertContentsJupyter(ctx context.Context, name string) { func (f filerTest) assertContentsJupyter(ctx context.Context, name string, language string) {
reader, err := f.Read(ctx, name) reader, err := f.Read(ctx, name)
if !assert.NoError(f, err) { if !assert.NoError(f, err) {
return return
@ -62,6 +62,7 @@ func (f filerTest) assertContentsJupyter(ctx context.Context, name string) {
// Since a roundtrip to the workspace changes a Jupyter notebook's payload, // Since a roundtrip to the workspace changes a Jupyter notebook's payload,
// the best we can do is assert that the nbformat is correct. // the best we can do is assert that the nbformat is correct.
assert.EqualValues(f, 4, actual["nbformat"]) assert.EqualValues(f, 4, actual["nbformat"])
assert.Equal(f, language, actual["metadata"].(map[string]any)["language_info"].(map[string]any)["name"])
} }
func (f filerTest) assertNotExists(ctx context.Context, name string) { func (f filerTest) assertNotExists(ctx context.Context, name string) {
@ -360,146 +361,114 @@ func TestAccFilerReadDir(t *testing.T) {
} }
} }
var jupyterNotebookContent1 = ` func TestAccFilerWorkspaceNotebook(t *testing.T) {
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"Jupyter Notebook Version 1\")"
]
}
],
"metadata": {
"language_info": {
"name": "python"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
`
var jupyterNotebookContent2 = `
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(\"Jupyter Notebook Version 2\")"
]
}
],
"metadata": {
"language_info": {
"name": "python"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}
`
func TestAccFilerWorkspaceNotebookConflict(t *testing.T) {
t.Parallel() t.Parallel()
f, _ := setupWsfsFiler(t)
ctx := context.Background() ctx := context.Background()
var err error var err error
// Upload the notebooks tcases := []struct {
err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('first upload'))")) name string
require.NoError(t, err) nameWithoutExt string
err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('first upload'))")) content1 string
require.NoError(t, err) expected1 string
err = f.Write(ctx, "sqlNb.sql", strings.NewReader("-- Databricks notebook source\n SELECT \"first upload\"")) content2 string
require.NoError(t, err) expected2 string
err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"first upload\"))")) }{
require.NoError(t, err) {
err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent1)) name: "pyNb.py",
nameWithoutExt: "pyNb",
content1: "# Databricks notebook source\nprint('first upload')",
expected1: "# Databricks notebook source\nprint('first upload')",
content2: "# Databricks notebook source\nprint('second upload')",
expected2: "# Databricks notebook source\nprint('second upload')",
},
{
name: "rNb.r",
nameWithoutExt: "rNb",
content1: "# Databricks notebook source\nprint('first upload')",
expected1: "# Databricks notebook source\nprint('first upload')",
content2: "# Databricks notebook source\nprint('second upload')",
expected2: "# Databricks notebook source\nprint('second upload')",
},
{
name: "sqlNb.sql",
nameWithoutExt: "sqlNb",
content1: "-- Databricks notebook source\n SELECT \"first upload\"",
expected1: "-- Databricks notebook source\n SELECT \"first upload\"",
content2: "-- Databricks notebook source\n SELECT \"second upload\"",
expected2: "-- Databricks notebook source\n SELECT \"second upload\"",
},
{
name: "scalaNb.scala",
nameWithoutExt: "scalaNb",
content1: "// Databricks notebook source\n println(\"first upload\")",
expected1: "// Databricks notebook source\n println(\"first upload\")",
content2: "// Databricks notebook source\n println(\"second upload\")",
expected2: "// Databricks notebook source\n println(\"second upload\")",
},
{
name: "pythonJupyterNb.ipynb",
nameWithoutExt: "pythonJupyterNb",
content1: readFile(t, "testdata/notebooks/py1.ipynb"),
expected1: "# Databricks notebook source\nprint(1)",
content2: readFile(t, "testdata/notebooks/py2.ipynb"),
expected2: "# Databricks notebook source\nprint(2)",
},
{
name: "rJupyterNb.ipynb",
nameWithoutExt: "rJupyterNb",
content1: readFile(t, "testdata/notebooks/r1.ipynb"),
expected1: "# Databricks notebook source\nprint(1)",
content2: readFile(t, "testdata/notebooks/r2.ipynb"),
expected2: "# Databricks notebook source\nprint(2)",
},
{
name: "scalaJupyterNb.ipynb",
nameWithoutExt: "scalaJupyterNb",
content1: readFile(t, "testdata/notebooks/scala1.ipynb"),
expected1: "// Databricks notebook source\nprintln(1)",
content2: readFile(t, "testdata/notebooks/scala2.ipynb"),
expected2: "// Databricks notebook source\nprintln(2)",
},
{
name: "sqlJupyterNotebook.ipynb",
nameWithoutExt: "sqlJupyterNotebook",
content1: readFile(t, "testdata/notebooks/sql1.ipynb"),
expected1: "-- Databricks notebook source\nselect 1",
content2: readFile(t, "testdata/notebooks/sql2.ipynb"),
expected2: "-- Databricks notebook source\nselect 2",
},
}
for _, tc := range tcases {
f, _ := setupWsfsFiler(t)
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
// Upload the notebook
err = f.Write(ctx, tc.name, strings.NewReader(tc.content1))
require.NoError(t, err) require.NoError(t, err)
// Assert contents after initial upload // Assert contents after initial upload. Note that we expect the content
filerTest{t, f}.assertContents(ctx, "pyNb", "# Databricks notebook source\nprint('first upload'))") // for jupyter notebooks to be of type source because the workspace files
filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('first upload'))") // client always uses the source format to read notebooks from the workspace.
filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"first upload\"") filerTest{t, f}.assertContents(ctx, tc.nameWithoutExt, tc.expected1)
filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"first upload\"))")
filerTest{t, f}.assertContents(ctx, "jupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")")
// Assert uploading a second time fails due to overwrite mode missing // Assert uploading a second time fails due to overwrite mode missing
err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('second upload'))")) err = f.Write(ctx, tc.name, strings.NewReader(tc.content2))
assert.ErrorIs(t, err, fs.ErrExist) assert.ErrorIs(t, err, fs.ErrExist)
assert.Regexp(t, regexp.MustCompile(`file already exists: .*/pyNb$`), err.Error()) assert.Regexp(t, regexp.MustCompile(`file already exists: .*/`+tc.nameWithoutExt+`$`), err.Error())
err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('second upload'))")) // Try uploading the notebook again with overwrite flag. This time it should succeed.
assert.ErrorIs(t, err, fs.ErrExist) err = f.Write(ctx, tc.name, strings.NewReader(tc.content2), filer.OverwriteIfExists)
assert.Regexp(t, regexp.MustCompile(`file already exists: .*/rNb$`), err.Error()) require.NoError(t, err)
err = f.Write(ctx, "sqlNb.sql", strings.NewReader("# Databricks notebook source\n SELECT \"second upload\")")) // Assert contents after second upload
assert.ErrorIs(t, err, fs.ErrExist) filerTest{t, f}.assertContents(ctx, tc.nameWithoutExt, tc.expected2)
assert.Regexp(t, regexp.MustCompile(`file already exists: .*/sqlNb$`), err.Error()) })
err = f.Write(ctx, "scalaNb.scala", strings.NewReader("# Databricks notebook source\n println(\"second upload\"))"))
assert.ErrorIs(t, err, fs.ErrExist)
assert.Regexp(t, regexp.MustCompile(`file already exists: .*/scalaNb$`), err.Error())
err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent2))
assert.ErrorIs(t, err, fs.ErrExist)
assert.Regexp(t, regexp.MustCompile(`file already exists: .*/jupyterNb$`), err.Error())
} }
func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) {
t.Parallel()
f, _ := setupWsfsFiler(t)
ctx := context.Background()
var err error
// Upload notebooks
err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('first upload'))"))
require.NoError(t, err)
err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('first upload'))"))
require.NoError(t, err)
err = f.Write(ctx, "sqlNb.sql", strings.NewReader("-- Databricks notebook source\n SELECT \"first upload\""))
require.NoError(t, err)
err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"first upload\"))"))
require.NoError(t, err)
err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent1))
require.NoError(t, err)
// Assert contents after initial upload
filerTest{t, f}.assertContents(ctx, "pyNb", "# Databricks notebook source\nprint('first upload'))")
filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('first upload'))")
filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"first upload\"")
filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"first upload\"))")
filerTest{t, f}.assertContents(ctx, "jupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")")
// Upload notebooks a second time, overwriting the initial uplaods
err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('second upload'))"), filer.OverwriteIfExists)
require.NoError(t, err)
err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('second upload'))"), filer.OverwriteIfExists)
require.NoError(t, err)
err = f.Write(ctx, "sqlNb.sql", strings.NewReader("-- Databricks notebook source\n SELECT \"second upload\""), filer.OverwriteIfExists)
require.NoError(t, err)
err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"second upload\"))"), filer.OverwriteIfExists)
require.NoError(t, err)
err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent2), filer.OverwriteIfExists)
require.NoError(t, err)
// Assert contents have been overwritten
filerTest{t, f}.assertContents(ctx, "pyNb", "# Databricks notebook source\nprint('second upload'))")
filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('second upload'))")
filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"second upload\"")
filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"second upload\"))")
filerTest{t, f}.assertContents(ctx, "jupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 2\")")
} }
func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) {
@ -515,11 +484,13 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) {
{"foo.r", "print('foo')"}, {"foo.r", "print('foo')"},
{"foo.scala", "println('foo')"}, {"foo.scala", "println('foo')"},
{"foo.sql", "SELECT 'foo'"}, {"foo.sql", "SELECT 'foo'"},
{"jupyterNb.ipynb", jupyterNotebookContent1}, {"py1.ipynb", readFile(t, "testdata/notebooks/py1.ipynb")},
{"jupyterNb2.ipynb", jupyterNotebookContent2},
{"pyNb.py", "# Databricks notebook source\nprint('first upload'))"}, {"pyNb.py", "# Databricks notebook source\nprint('first upload'))"},
{"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")},
{"rNb.r", "# Databricks notebook source\nprint('first upload'))"}, {"rNb.r", "# Databricks notebook source\nprint('first upload'))"},
{"scala1.ipynb", readFile(t, "testdata/notebooks/scala1.ipynb")},
{"scalaNb.scala", "// Databricks notebook source\n println(\"first upload\"))"}, {"scalaNb.scala", "// Databricks notebook source\n println(\"first upload\"))"},
{"sql1.ipynb", readFile(t, "testdata/notebooks/sql1.ipynb")},
{"sqlNb.sql", "-- Databricks notebook source\n SELECT \"first upload\""}, {"sqlNb.sql", "-- Databricks notebook source\n SELECT \"first upload\""},
} }
@ -554,11 +525,13 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) {
"foo.r", "foo.r",
"foo.scala", "foo.scala",
"foo.sql", "foo.sql",
"jupyterNb.ipynb", "py1.ipynb",
"jupyterNb2.ipynb",
"pyNb.py", "pyNb.py",
"r1.ipynb",
"rNb.r", "rNb.r",
"scala1.ipynb",
"scalaNb.scala", "scalaNb.scala",
"sql1.ipynb",
"sqlNb.sql", "sqlNb.sql",
}, names) }, names)
@ -582,7 +555,10 @@ func setupFilerWithExtensionsTest(t *testing.T) filer.Filer {
}{ }{
{"foo.py", "# Databricks notebook source\nprint('first upload'))"}, {"foo.py", "# Databricks notebook source\nprint('first upload'))"},
{"bar.py", "print('foo')"}, {"bar.py", "print('foo')"},
{"jupyter.ipynb", jupyterNotebookContent1}, {"p1.ipynb", readFile(t, "testdata/notebooks/py1.ipynb")},
{"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")},
{"scala1.ipynb", readFile(t, "testdata/notebooks/scala1.ipynb")},
{"sql1.ipynb", readFile(t, "testdata/notebooks/sql1.ipynb")},
{"pretender", "not a notebook"}, {"pretender", "not a notebook"},
{"dir/file.txt", "file content"}, {"dir/file.txt", "file content"},
{"scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')"}, {"scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')"},
@ -608,11 +584,15 @@ func TestAccFilerWorkspaceFilesExtensionsRead(t *testing.T) {
// Read contents of test fixtures as a sanity check. // Read contents of test fixtures as a sanity check.
filerTest{t, wf}.assertContents(ctx, "foo.py", "# Databricks notebook source\nprint('first upload'))") filerTest{t, wf}.assertContents(ctx, "foo.py", "# Databricks notebook source\nprint('first upload'))")
filerTest{t, wf}.assertContents(ctx, "bar.py", "print('foo')") filerTest{t, wf}.assertContents(ctx, "bar.py", "print('foo')")
filerTest{t, wf}.assertContentsJupyter(ctx, "jupyter.ipynb")
filerTest{t, wf}.assertContents(ctx, "dir/file.txt", "file content") filerTest{t, wf}.assertContents(ctx, "dir/file.txt", "file content")
filerTest{t, wf}.assertContents(ctx, "scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')") filerTest{t, wf}.assertContents(ctx, "scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')")
filerTest{t, wf}.assertContents(ctx, "pretender", "not a notebook") filerTest{t, wf}.assertContents(ctx, "pretender", "not a notebook")
filerTest{t, wf}.assertContentsJupyter(ctx, "p1.ipynb", "python")
filerTest{t, wf}.assertContentsJupyter(ctx, "r1.ipynb", "r")
filerTest{t, wf}.assertContentsJupyter(ctx, "scala1.ipynb", "scala")
filerTest{t, wf}.assertContentsJupyter(ctx, "sql1.ipynb", "sql")
// Read non-existent file // Read non-existent file
_, err := wf.Read(ctx, "non-existent.py") _, err := wf.Read(ctx, "non-existent.py")
assert.ErrorIs(t, err, fs.ErrNotExist) assert.ErrorIs(t, err, fs.ErrNotExist)
@ -638,35 +618,41 @@ func TestAccFilerWorkspaceFilesExtensionsDelete(t *testing.T) {
ctx := context.Background() ctx := context.Background()
wf := setupFilerWithExtensionsTest(t) wf := setupFilerWithExtensionsTest(t)
// Delete notebook for _, fileName := range []string{
err := wf.Delete(ctx, "foo.py") // notebook
"foo.py",
// file
"bar.py",
// python jupyter notebook
"p1.ipynb",
// R jupyter notebook
"r1.ipynb",
// Scala jupyter notebook
"scala1.ipynb",
// SQL jupyter notebook
"sql1.ipynb",
} {
err := wf.Delete(ctx, fileName)
require.NoError(t, err) require.NoError(t, err)
filerTest{t, wf}.assertNotExists(ctx, "foo.py") filerTest{t, wf}.assertNotExists(ctx, fileName)
}
// Delete file for _, fileName := range []string{
err = wf.Delete(ctx, "bar.py") // do not delete non-existent file
require.NoError(t, err) "non-existent.py",
filerTest{t, wf}.assertNotExists(ctx, "bar.py") // do not delete a file assuming it is a notebook and stripping the extension
"pretender.py",
// Delete jupyter notebook // do not delete a Scala notebook as a Python notebook
err = wf.Delete(ctx, "jupyter.ipynb") "scala-notebook.py",
require.NoError(t, err) // do not delete a file assuming it is a Jupyter notebook and stripping the extension
filerTest{t, wf}.assertNotExists(ctx, "jupyter.ipynb") "pretender.ipynb",
} {
// Delete non-existent file err := wf.Delete(ctx, fileName)
err = wf.Delete(ctx, "non-existent.py")
assert.ErrorIs(t, err, fs.ErrNotExist)
// Ensure we do not delete a file as a notebook
err = wf.Delete(ctx, "pretender.py")
assert.ErrorIs(t, err, fs.ErrNotExist)
// Ensure we do not delete a Scala notebook as a Python notebook
_, err = wf.Read(ctx, "scala-notebook.py")
assert.ErrorIs(t, err, fs.ErrNotExist) assert.ErrorIs(t, err, fs.ErrNotExist)
}
// Delete directory // Delete directory
err = wf.Delete(ctx, "dir") err := wf.Delete(ctx, "dir")
assert.ErrorIs(t, err, fs.ErrInvalid) assert.ErrorIs(t, err, fs.ErrInvalid)
// Delete directory recursively // Delete directory recursively
@ -681,45 +667,46 @@ func TestAccFilerWorkspaceFilesExtensionsStat(t *testing.T) {
ctx := context.Background() ctx := context.Background()
wf := setupFilerWithExtensionsTest(t) wf := setupFilerWithExtensionsTest(t)
// Stat on a notebook for _, fileName := range []string{
info, err := wf.Stat(ctx, "foo.py") // notebook
"foo.py",
// file
"bar.py",
// python jupyter notebook
"p1.ipynb",
// R jupyter notebook
"r1.ipynb",
// Scala jupyter notebook
"scala1.ipynb",
// SQL jupyter notebook
"sql1.ipynb",
} {
info, err := wf.Stat(ctx, fileName)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, "foo.py", info.Name()) assert.Equal(t, fileName, info.Name())
assert.False(t, info.IsDir())
// Stat on a file
info, err = wf.Stat(ctx, "bar.py")
require.NoError(t, err)
assert.Equal(t, "bar.py", info.Name())
assert.False(t, info.IsDir())
// Stat on a Jupyter notebook
info, err = wf.Stat(ctx, "jupyter.ipynb")
require.NoError(t, err)
assert.Equal(t, "jupyter.ipynb", info.Name())
assert.False(t, info.IsDir()) assert.False(t, info.IsDir())
}
// Stat on a directory // Stat on a directory
info, err = wf.Stat(ctx, "dir") info, err := wf.Stat(ctx, "dir")
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, "dir", info.Name()) assert.Equal(t, "dir", info.Name())
assert.True(t, info.IsDir()) assert.True(t, info.IsDir())
// Stat on a non-existent file for _, fileName := range []string{
_, err = wf.Stat(ctx, "non-existent.py") // non-existent file
assert.ErrorIs(t, err, fs.ErrNotExist) "non-existent.py",
// do not stat a file assuming it is a notebook and stripping the extension
// Ensure we do not stat a file as a notebook "pretender.py",
_, err = wf.Stat(ctx, "pretender.py") // do not stat a Scala notebook as a Python notebook
assert.ErrorIs(t, err, fs.ErrNotExist) "scala-notebook.py",
// do not read a regular file assuming it is a Jupyter notebook and stripping the extension
// Ensure we do not stat a Scala notebook as a Python notebook "pretender.ipynb",
_, err = wf.Stat(ctx, "scala-notebook.py") } {
assert.ErrorIs(t, err, fs.ErrNotExist) _, err := wf.Stat(ctx, fileName)
_, err = wf.Stat(ctx, "pretender.ipynb")
assert.ErrorIs(t, err, fs.ErrNotExist) assert.ErrorIs(t, err, fs.ErrNotExist)
} }
}
func TestAccWorkspaceFilesExtensionsDirectoriesAreNotNotebooks(t *testing.T) { func TestAccWorkspaceFilesExtensionsDirectoriesAreNotNotebooks(t *testing.T) {
t.Parallel() t.Parallel()
@ -739,32 +726,115 @@ func TestAccWorkspaceFilesExtensionsDirectoriesAreNotNotebooks(t *testing.T) {
func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) { func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) {
t.Parallel() t.Parallel()
// Case 1: Writing source notebooks.
for _, tc := range []struct {
language string
sourceName string
sourceContent string
jupyterName string
jupyterContent string
}{
{
language: "python",
sourceName: "foo.py",
sourceContent: "# Databricks notebook source\nprint('foo')",
jupyterName: "foo.ipynb",
},
{
language: "r",
sourceName: "foo.r",
sourceContent: "# Databricks notebook source\nprint('foo')",
jupyterName: "foo.ipynb",
},
{
language: "scala",
sourceName: "foo.scala",
sourceContent: "// Databricks notebook source\nprintln('foo')",
jupyterName: "foo.ipynb",
},
{
language: "sql",
sourceName: "foo.sql",
sourceContent: "-- Databricks notebook source\nselect 'foo'",
jupyterName: "foo.ipynb",
},
} {
t.Run("source_"+tc.language, func(t *testing.T) {
t.Parallel()
ctx := context.Background() ctx := context.Background()
wf, _ := setupWsfsExtensionsFiler(t) wf, _ := setupWsfsExtensionsFiler(t)
// Case 1: Source Notebook err := wf.Write(ctx, tc.sourceName, strings.NewReader(tc.sourceContent))
err := wf.Write(ctx, "foo.py", strings.NewReader("# Databricks notebook source\nprint('foo')"))
require.NoError(t, err) require.NoError(t, err)
// The source notebook should exist but not the Jupyter notebook // Assert on the content of the source notebook that's been written.
filerTest{t, wf}.assertContents(ctx, "foo.py", "# Databricks notebook source\nprint('foo')") filerTest{t, wf}.assertContents(ctx, tc.sourceName, tc.sourceContent)
_, err = wf.Stat(ctx, "foo.ipynb")
assert.ErrorIs(t, err, fs.ErrNotExist)
_, err = wf.Read(ctx, "foo.ipynb")
assert.ErrorIs(t, err, fs.ErrNotExist)
err = wf.Delete(ctx, "foo.ipynb")
assert.ErrorIs(t, err, fs.ErrNotExist)
// Case 2: Jupyter Notebook // Ensure that the source notebook is not read when the name contains
err = wf.Write(ctx, "bar.ipynb", strings.NewReader(jupyterNotebookContent1)) // the .ipynb extension.
require.NoError(t, err) _, err = wf.Stat(ctx, tc.jupyterName)
// The Jupyter notebook should exist but not the source notebook
filerTest{t, wf}.assertContentsJupyter(ctx, "bar.ipynb")
_, err = wf.Stat(ctx, "bar.py")
assert.ErrorIs(t, err, fs.ErrNotExist) assert.ErrorIs(t, err, fs.ErrNotExist)
_, err = wf.Read(ctx, "bar.py") _, err = wf.Read(ctx, tc.jupyterName)
assert.ErrorIs(t, err, fs.ErrNotExist) assert.ErrorIs(t, err, fs.ErrNotExist)
err = wf.Delete(ctx, "bar.py") err = wf.Delete(ctx, tc.jupyterName)
assert.ErrorIs(t, err, fs.ErrNotExist) assert.ErrorIs(t, err, fs.ErrNotExist)
})
}
// Case 2: Writing Jupyter notebooks.
for _, tc := range []struct {
language string
sourceName string
jupyterName string
jupyterContent string
}{
{
language: "python",
sourceName: "foo.py",
jupyterName: "foo.ipynb",
jupyterContent: readFile(t, "testdata/notebooks/py1.ipynb"),
},
{
language: "r",
sourceName: "foo.r",
jupyterName: "foo.ipynb",
jupyterContent: readFile(t, "testdata/notebooks/r1.ipynb"),
},
{
language: "scala",
sourceName: "foo.scala",
jupyterName: "foo.ipynb",
jupyterContent: readFile(t, "testdata/notebooks/scala1.ipynb"),
},
{
language: "sql",
sourceName: "foo.sql",
jupyterName: "foo.ipynb",
jupyterContent: readFile(t, "testdata/notebooks/sql1.ipynb"),
},
} {
t.Run("jupyter_"+tc.language, func(t *testing.T) {
t.Parallel()
ctx := context.Background()
wf, _ := setupWsfsExtensionsFiler(t)
err := wf.Write(ctx, tc.jupyterName, strings.NewReader(tc.jupyterContent))
require.NoError(t, err)
// Assert that the written notebook is jupyter and has the correct
// language_info metadata set.
filerTest{t, wf}.assertContentsJupyter(ctx, tc.jupyterName, tc.language)
// Ensure that the Jupyter notebook is not read when the name does not
// contain the .ipynb extension.
_, err = wf.Stat(ctx, tc.sourceName)
assert.ErrorIs(t, err, fs.ErrNotExist)
_, err = wf.Read(ctx, tc.sourceName)
assert.ErrorIs(t, err, fs.ErrNotExist)
err = wf.Delete(ctx, tc.sourceName)
assert.ErrorIs(t, err, fs.ErrNotExist)
})
}
} }

View File

@ -352,6 +352,13 @@ func RequireErrorRun(t *testing.T, args ...string) (bytes.Buffer, bytes.Buffer,
return stdout, stderr, err return stdout, stderr, err
} }
func readFile(t *testing.T, name string) string {
b, err := os.ReadFile(name)
require.NoError(t, err)
return string(b)
}
func writeFile(t *testing.T, name string, body string) string { func writeFile(t *testing.T, name string, body string) string {
f, err := os.Create(filepath.Join(t.TempDir(), name)) f, err := os.Create(filepath.Join(t.TempDir(), name))
require.NoError(t, err) require.NoError(t, err)
@ -562,12 +569,10 @@ func setupLocalFiler(t *testing.T) (filer.Filer, string) {
} }
func setupWsfsFiler(t *testing.T) (filer.Filer, string) { func setupWsfsFiler(t *testing.T) (filer.Filer, string) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) ctx, wt := acc.WorkspaceTest(t)
ctx := context.Background() tmpdir := TemporaryWorkspaceDir(t, wt.W)
w := databricks.Must(databricks.NewWorkspaceClient()) f, err := filer.NewWorkspaceFilesClient(wt.W, tmpdir)
tmpdir := TemporaryWorkspaceDir(t, w)
f, err := filer.NewWorkspaceFilesClient(w, tmpdir)
require.NoError(t, err) require.NoError(t, err)
// Check if we can use this API here, skip test if we cannot. // Check if we can use this API here, skip test if we cannot.
@ -581,11 +586,10 @@ func setupWsfsFiler(t *testing.T) (filer.Filer, string) {
} }
func setupWsfsExtensionsFiler(t *testing.T) (filer.Filer, string) { func setupWsfsExtensionsFiler(t *testing.T) (filer.Filer, string) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) _, wt := acc.WorkspaceTest(t)
w := databricks.Must(databricks.NewWorkspaceClient()) tmpdir := TemporaryWorkspaceDir(t, wt.W)
tmpdir := TemporaryWorkspaceDir(t, w) f, err := filer.NewWorkspaceFilesExtensionsClient(wt.W, tmpdir)
f, err := filer.NewWorkspaceFilesExtensionsClient(w, tmpdir)
require.NoError(t, err) require.NoError(t, err)
return f, tmpdir return f, tmpdir

27
internal/testdata/notebooks/py1.ipynb vendored Normal file
View File

@ -0,0 +1,27 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(1)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.8.13"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

27
internal/testdata/notebooks/py2.ipynb vendored Normal file
View File

@ -0,0 +1,27 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(2)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.8.13"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

25
internal/testdata/notebooks/r1.ipynb vendored Normal file
View File

@ -0,0 +1,25 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(1)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"name": "R"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

29
internal/testdata/notebooks/r2.ipynb vendored Normal file
View File

@ -0,0 +1,29 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "r"
}
},
"outputs": [],
"source": [
"print(2)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "R",
"language": "R",
"name": "ir"
},
"language_info": {
"name": "R"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,38 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1\n"
]
}
],
"source": [
"println(1)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Scala",
"language": "scala",
"name": "scala"
},
"language_info": {
"codemirror_mode": "text/x-scala",
"file_extension": ".sc",
"mimetype": "text/x-scala",
"name": "scala",
"nbconvert_exporter": "script",
"version": "2.13.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -0,0 +1,38 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1\n"
]
}
],
"source": [
"println(2)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Scala",
"language": "scala",
"name": "scala"
},
"language_info": {
"codemirror_mode": "text/x-scala",
"file_extension": ".sc",
"mimetype": "text/x-scala",
"name": "scala",
"nbconvert_exporter": "script",
"version": "2.13.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

20
internal/testdata/notebooks/sql1.ipynb vendored Normal file
View File

@ -0,0 +1,20 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"select 1"
]
}
],
"metadata": {
"language_info": {
"name": "sql"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

20
internal/testdata/notebooks/sql2.ipynb vendored Normal file
View File

@ -0,0 +1,20 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"select 2"
]
}
],
"metadata": {
"language_info": {
"name": "sql"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -7,6 +7,7 @@ import (
"io" "io"
"io/fs" "io/fs"
"path" "path"
"slices"
"strings" "strings"
"github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/log"
@ -23,14 +24,6 @@ type workspaceFilesExtensionsClient struct {
readonly bool readonly bool
} }
var extensionsToLanguages = map[string]workspace.Language{
".py": workspace.LanguagePython,
".r": workspace.LanguageR,
".scala": workspace.LanguageScala,
".sql": workspace.LanguageSql,
".ipynb": workspace.LanguagePython,
}
type workspaceFileStatus struct { type workspaceFileStatus struct {
wsfsFileInfo wsfsFileInfo
@ -54,7 +47,12 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex
nameWithoutExt := strings.TrimSuffix(name, ext) nameWithoutExt := strings.TrimSuffix(name, ext)
// File name does not have an extension associated with Databricks notebooks, return early. // File name does not have an extension associated with Databricks notebooks, return early.
if _, ok := extensionsToLanguages[ext]; !ok { if !slices.Contains([]string{
notebook.ExtensionPython,
notebook.ExtensionR,
notebook.ExtensionScala,
notebook.ExtensionSql,
notebook.ExtensionJupyter}, ext) {
return nil, nil return nil, nil
} }
@ -75,22 +73,23 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex
return nil, nil return nil, nil
} }
// Not the correct language. Return early. // Not the correct language. Return early. Note: All languages are supported
if stat.Language != extensionsToLanguages[ext] { // for Jupyter notebooks.
log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not of the correct language. Expected %s but found %s.", name, path.Join(w.root, nameWithoutExt), extensionsToLanguages[ext], stat.Language) if ext != notebook.ExtensionJupyter && stat.Language != notebook.ExtensionToLanguage[ext] {
log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not of the correct language. Expected %s but found %s.", name, path.Join(w.root, nameWithoutExt), notebook.ExtensionToLanguage[ext], stat.Language)
return nil, nil return nil, nil
} }
// When the extension is .py we expect the export format to be source. // For non-jupyter notebooks the export format should be source.
// If it's not, return early. // If it's not, return early.
if ext == ".py" && stat.ReposExportFormat != workspace.ExportFormatSource { if ext != notebook.ExtensionJupyter && stat.ReposExportFormat != workspace.ExportFormatSource {
log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not exported as a source notebook. Its export format is %s.", name, path.Join(w.root, nameWithoutExt), stat.ReposExportFormat) log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not exported as a source notebook. Its export format is %s.", name, path.Join(w.root, nameWithoutExt), stat.ReposExportFormat)
return nil, nil return nil, nil
} }
// When the extension is .ipynb we expect the export format to be Jupyter. // When the extension is .ipynb we expect the export format to be Jupyter.
// If it's not, return early. // If it's not, return early.
if ext == ".ipynb" && stat.ReposExportFormat != workspace.ExportFormatJupyter { if ext == notebook.ExtensionJupyter && stat.ReposExportFormat != workspace.ExportFormatJupyter {
log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not exported as a Jupyter notebook. Its export format is %s.", name, path.Join(w.root, nameWithoutExt), stat.ReposExportFormat) log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not exported as a Jupyter notebook. Its export format is %s.", name, path.Join(w.root, nameWithoutExt), stat.ReposExportFormat)
return nil, nil return nil, nil
} }
@ -120,8 +119,8 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx con
ext := notebook.GetExtensionByLanguage(&stat.ObjectInfo) ext := notebook.GetExtensionByLanguage(&stat.ObjectInfo)
// If the notebook was exported as a Jupyter notebook, the extension should be .ipynb. // If the notebook was exported as a Jupyter notebook, the extension should be .ipynb.
if stat.Language == workspace.LanguagePython && stat.ReposExportFormat == workspace.ExportFormatJupyter { if stat.ReposExportFormat == workspace.ExportFormatJupyter {
ext = ".ipynb" ext = notebook.ExtensionJupyter
} }
// Modify the stat object path to include the extension. This stat object will be used // Modify the stat object path to include the extension. This stat object will be used

View File

@ -37,7 +37,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) {
expectedError string expectedError string
}{ }{
{ {
name: "python source notebook and file", name: "python source notebook and file with source extension",
language: workspace.LanguagePython, language: workspace.LanguagePython,
notebookExportFormat: workspace.ExportFormatSource, notebookExportFormat: workspace.ExportFormatSource,
notebookPath: "/dir/foo", notebookPath: "/dir/foo",
@ -45,7 +45,31 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) {
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.py resolve to the same name /foo.py. Changing the name of one of these objects will resolve this issue", expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.py resolve to the same name /foo.py. Changing the name of one of these objects will resolve this issue",
}, },
{ {
name: "python jupyter notebook and file", name: "scala source notebook and file with source extension",
language: workspace.LanguageScala,
notebookExportFormat: workspace.ExportFormatSource,
notebookPath: "/dir/foo",
filePath: "/dir/foo.scala",
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.scala resolve to the same name /foo.scala. Changing the name of one of these objects will resolve this issue",
},
{
name: "r source notebook and file with source extension",
language: workspace.LanguageR,
notebookExportFormat: workspace.ExportFormatSource,
notebookPath: "/dir/foo",
filePath: "/dir/foo.r",
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.r resolve to the same name /foo.r. Changing the name of one of these objects will resolve this issue",
},
{
name: "sql source notebook and file with source extension",
language: workspace.LanguageSql,
notebookExportFormat: workspace.ExportFormatSource,
notebookPath: "/dir/foo",
filePath: "/dir/foo.sql",
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.sql resolve to the same name /foo.sql. Changing the name of one of these objects will resolve this issue",
},
{
name: "python jupyter notebook and file with source extension",
language: workspace.LanguagePython, language: workspace.LanguagePython,
notebookExportFormat: workspace.ExportFormatJupyter, notebookExportFormat: workspace.ExportFormatJupyter,
notebookPath: "/dir/foo", notebookPath: "/dir/foo",
@ -54,37 +78,64 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) {
expectedError: "", expectedError: "",
}, },
{ {
name: "scala source notebook and file", name: "scala jupyter notebook and file with source extension",
language: workspace.LanguageScala, language: workspace.LanguageScala,
notebookExportFormat: workspace.ExportFormatSource, notebookExportFormat: workspace.ExportFormatJupyter,
notebookPath: "/dir/foo", notebookPath: "/dir/foo",
filePath: "/dir/foo.scala", filePath: "/dir/foo.scala",
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.scala resolve to the same name /foo.scala. Changing the name of one of these objects will resolve this issue", // Jupyter notebooks would correspond to foo.ipynb so an error is not expected.
expectedError: "",
}, },
{ {
name: "r source notebook and file", name: "sql jupyter notebook and file with source extension",
language: workspace.LanguageR,
notebookExportFormat: workspace.ExportFormatSource,
notebookPath: "/dir/foo",
filePath: "/dir/foo.r",
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.r resolve to the same name /foo.r. Changing the name of one of these objects will resolve this issue",
},
{
name: "sql source notebook and file",
language: workspace.LanguageSql, language: workspace.LanguageSql,
notebookExportFormat: workspace.ExportFormatSource, notebookExportFormat: workspace.ExportFormatJupyter,
notebookPath: "/dir/foo", notebookPath: "/dir/foo",
filePath: "/dir/foo.sql", filePath: "/dir/foo.sql",
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.sql resolve to the same name /foo.sql. Changing the name of one of these objects will resolve this issue", // Jupyter notebooks would correspond to foo.ipynb so an error is not expected.
expectedError: "",
}, },
{ {
name: "python jupyter notebook and file", name: "r jupyter notebook and file with source extension",
language: workspace.LanguageR,
notebookExportFormat: workspace.ExportFormatJupyter,
notebookPath: "/dir/foo",
filePath: "/dir/foo.sql",
// Jupyter notebooks would correspond to foo.ipynb so an error is not expected.
expectedError: "",
},
{
name: "python jupyter notebook and file with .ipynb extension",
language: workspace.LanguagePython, language: workspace.LanguagePython,
notebookExportFormat: workspace.ExportFormatJupyter, notebookExportFormat: workspace.ExportFormatJupyter,
notebookPath: "/dir/foo", notebookPath: "/dir/foo",
filePath: "/dir/foo.ipynb", filePath: "/dir/foo.ipynb",
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue",
}, },
{
name: "scala jupyter notebook and file with .ipynb extension",
language: workspace.LanguageScala,
notebookExportFormat: workspace.ExportFormatJupyter,
notebookPath: "/dir/foo",
filePath: "/dir/foo.ipynb",
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue",
},
{
name: "r jupyter notebook and file with .ipynb extension",
language: workspace.LanguageR,
notebookExportFormat: workspace.ExportFormatJupyter,
notebookPath: "/dir/foo",
filePath: "/dir/foo.ipynb",
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue",
},
{
name: "sql jupyter notebook and file with .ipynb extension",
language: workspace.LanguageSql,
notebookExportFormat: workspace.ExportFormatJupyter,
notebookPath: "/dir/foo",
filePath: "/dir/foo.ipynb",
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue",
},
} { } {
t.Run(tc.name, func(t *testing.T) { t.Run(tc.name, func(t *testing.T) {
mockedWorkspaceClient := mocks.NewMockWorkspaceClient(t) mockedWorkspaceClient := mocks.NewMockWorkspaceClient(t)

View File

@ -107,19 +107,19 @@ func DetectWithFS(fsys fs.FS, name string) (notebook bool, language workspace.La
// Determine which header to expect based on filename extension. // Determine which header to expect based on filename extension.
ext := strings.ToLower(filepath.Ext(name)) ext := strings.ToLower(filepath.Ext(name))
switch ext { switch ext {
case ".py": case ExtensionPython:
header = `# Databricks notebook source` header = `# Databricks notebook source`
language = workspace.LanguagePython language = workspace.LanguagePython
case ".r": case ExtensionR:
header = `# Databricks notebook source` header = `# Databricks notebook source`
language = workspace.LanguageR language = workspace.LanguageR
case ".scala": case ExtensionScala:
header = "// Databricks notebook source" header = "// Databricks notebook source"
language = workspace.LanguageScala language = workspace.LanguageScala
case ".sql": case ExtensionSql:
header = "-- Databricks notebook source" header = "-- Databricks notebook source"
language = workspace.LanguageSql language = workspace.LanguageSql
case ".ipynb": case ExtensionJupyter:
return DetectJupyterWithFS(fsys, name) return DetectJupyterWithFS(fsys, name)
default: default:
return false, "", nil return false, "", nil

View File

@ -2,22 +2,40 @@ package notebook
import "github.com/databricks/databricks-sdk-go/service/workspace" import "github.com/databricks/databricks-sdk-go/service/workspace"
const (
ExtensionNone string = ""
ExtensionPython string = ".py"
ExtensionR string = ".r"
ExtensionScala string = ".scala"
ExtensionSql string = ".sql"
ExtensionJupyter string = ".ipynb"
)
var ExtensionToLanguage = map[string]workspace.Language{
ExtensionPython: workspace.LanguagePython,
ExtensionR: workspace.LanguageR,
ExtensionScala: workspace.LanguageScala,
ExtensionSql: workspace.LanguageSql,
// The platform supports all languages (Python, R, Scala, and SQL) for Jupyter notebooks.
}
func GetExtensionByLanguage(objectInfo *workspace.ObjectInfo) string { func GetExtensionByLanguage(objectInfo *workspace.ObjectInfo) string {
if objectInfo.ObjectType != workspace.ObjectTypeNotebook { if objectInfo.ObjectType != workspace.ObjectTypeNotebook {
return "" return ExtensionNone
} }
switch objectInfo.Language { switch objectInfo.Language {
case workspace.LanguagePython: case workspace.LanguagePython:
return ".py" return ExtensionPython
case workspace.LanguageR: case workspace.LanguageR:
return ".r" return ExtensionR
case workspace.LanguageScala: case workspace.LanguageScala:
return ".scala" return ExtensionScala
case workspace.LanguageSql: case workspace.LanguageSql:
return ".sql" return ExtensionSql
default: default:
// Do not add any extension to the file name // Do not add any extension to the file name
return "" return ExtensionNone
} }
} }