mirror of https://github.com/databricks/cli.git
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes ### Background The workspace import APIs recently added support for importing Jupyter notebooks written in R, Scala, or SQL, that is non-Python notebooks. This now works for the `/import-file` API which we leverage in the CLI. Note: We do not need any changes in `databricks sync`. It works out of the box because any state mapping of local names to remote names that we store is only scoped to the notebook extension (i.e., `.ipynb` in this case) and is agnostic of the notebook's specific language. ### Problem this PR addresses The extension-aware filer previously did not function because it checks that a `.ipynb` notebook is written in Python. This PR relaxes that constraint and adds integration tests for both the normal workspace filer and extensions aware filer writing and reading non-Python `.ipynb` notebooks. This implies that after this PR DABs in the workspace / CLI from DBR will work for non-Python notebooks as well. non-Python notebooks for DABs deployment from local machines already works after the platform side changes to the API landed, this PR just adds integration tests for that bit of functionality. Note: Any platform side changes we needed for the import API have already been rolled out to production. ### Before DABs deploy would work fine for non-Python notebooks. But DABs deployments from DBR would not. ### After DABs deploys both from local machines and DBR will work fine. ## Testing For creating the `.ipynb` notebook fixtures used in the integration tests I created them directly from the VSCode UI. This ensures high fidelity with how users will create their non-Python notebooks locally. For Python notebooks this is supported out of the box by VSCode but for R and Scala notebooks this requires installing the Jupyter kernel for R and Scala on my local machine and using that from VSCode. For SQL, I ended up directly modifying the `language_info` field in the Jupyter metadata to create the test fixture. ### Discussion: Issues with configuring language at the cell level The language metadata for a Jupyter notebook is standardized at the notebook level (in the `language_info` field). Unfortunately, it's not standardized at the cell level. Thus, for example, if a user changes the language for their cell in VSCode (which is supported by the standard Jupyter VSCode integration), it'll cause a runtime error when the user actually attempts to run the notebook. This is because the cell-level metadata is encoded in a format specific to VSCode: ``` cells: []{ "vscode": { "languageId": "sql" } } ``` Supporting cell level languages is thus out of scope for this PR and can be revisited along with the workspace files team if there's strong customer interest.
This commit is contained in:
parent
25838ee0af
commit
e1978fa429
|
@ -39,7 +39,7 @@ func (f filerTest) assertContents(ctx context.Context, name string, contents str
|
||||||
assert.Equal(f, contents, body.String())
|
assert.Equal(f, contents, body.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f filerTest) assertContentsJupyter(ctx context.Context, name string) {
|
func (f filerTest) assertContentsJupyter(ctx context.Context, name string, language string) {
|
||||||
reader, err := f.Read(ctx, name)
|
reader, err := f.Read(ctx, name)
|
||||||
if !assert.NoError(f, err) {
|
if !assert.NoError(f, err) {
|
||||||
return
|
return
|
||||||
|
@ -62,6 +62,7 @@ func (f filerTest) assertContentsJupyter(ctx context.Context, name string) {
|
||||||
// Since a roundtrip to the workspace changes a Jupyter notebook's payload,
|
// Since a roundtrip to the workspace changes a Jupyter notebook's payload,
|
||||||
// the best we can do is assert that the nbformat is correct.
|
// the best we can do is assert that the nbformat is correct.
|
||||||
assert.EqualValues(f, 4, actual["nbformat"])
|
assert.EqualValues(f, 4, actual["nbformat"])
|
||||||
|
assert.Equal(f, language, actual["metadata"].(map[string]any)["language_info"].(map[string]any)["name"])
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f filerTest) assertNotExists(ctx context.Context, name string) {
|
func (f filerTest) assertNotExists(ctx context.Context, name string) {
|
||||||
|
@ -360,146 +361,114 @@ func TestAccFilerReadDir(t *testing.T) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var jupyterNotebookContent1 = `
|
func TestAccFilerWorkspaceNotebook(t *testing.T) {
|
||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"print(\"Jupyter Notebook Version 1\")"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"language_info": {
|
|
||||||
"name": "python"
|
|
||||||
},
|
|
||||||
"orig_nbformat": 4
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
`
|
|
||||||
|
|
||||||
var jupyterNotebookContent2 = `
|
|
||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"print(\"Jupyter Notebook Version 2\")"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"language_info": {
|
|
||||||
"name": "python"
|
|
||||||
},
|
|
||||||
"orig_nbformat": 4
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 2
|
|
||||||
}
|
|
||||||
`
|
|
||||||
|
|
||||||
func TestAccFilerWorkspaceNotebookConflict(t *testing.T) {
|
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
f, _ := setupWsfsFiler(t)
|
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
// Upload the notebooks
|
tcases := []struct {
|
||||||
err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('first upload'))"))
|
name string
|
||||||
require.NoError(t, err)
|
nameWithoutExt string
|
||||||
err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('first upload'))"))
|
content1 string
|
||||||
require.NoError(t, err)
|
expected1 string
|
||||||
err = f.Write(ctx, "sqlNb.sql", strings.NewReader("-- Databricks notebook source\n SELECT \"first upload\""))
|
content2 string
|
||||||
require.NoError(t, err)
|
expected2 string
|
||||||
err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"first upload\"))"))
|
}{
|
||||||
require.NoError(t, err)
|
{
|
||||||
err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent1))
|
name: "pyNb.py",
|
||||||
|
nameWithoutExt: "pyNb",
|
||||||
|
content1: "# Databricks notebook source\nprint('first upload')",
|
||||||
|
expected1: "# Databricks notebook source\nprint('first upload')",
|
||||||
|
content2: "# Databricks notebook source\nprint('second upload')",
|
||||||
|
expected2: "# Databricks notebook source\nprint('second upload')",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "rNb.r",
|
||||||
|
nameWithoutExt: "rNb",
|
||||||
|
content1: "# Databricks notebook source\nprint('first upload')",
|
||||||
|
expected1: "# Databricks notebook source\nprint('first upload')",
|
||||||
|
content2: "# Databricks notebook source\nprint('second upload')",
|
||||||
|
expected2: "# Databricks notebook source\nprint('second upload')",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "sqlNb.sql",
|
||||||
|
nameWithoutExt: "sqlNb",
|
||||||
|
content1: "-- Databricks notebook source\n SELECT \"first upload\"",
|
||||||
|
expected1: "-- Databricks notebook source\n SELECT \"first upload\"",
|
||||||
|
content2: "-- Databricks notebook source\n SELECT \"second upload\"",
|
||||||
|
expected2: "-- Databricks notebook source\n SELECT \"second upload\"",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "scalaNb.scala",
|
||||||
|
nameWithoutExt: "scalaNb",
|
||||||
|
content1: "// Databricks notebook source\n println(\"first upload\")",
|
||||||
|
expected1: "// Databricks notebook source\n println(\"first upload\")",
|
||||||
|
content2: "// Databricks notebook source\n println(\"second upload\")",
|
||||||
|
expected2: "// Databricks notebook source\n println(\"second upload\")",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "pythonJupyterNb.ipynb",
|
||||||
|
nameWithoutExt: "pythonJupyterNb",
|
||||||
|
content1: readFile(t, "testdata/notebooks/py1.ipynb"),
|
||||||
|
expected1: "# Databricks notebook source\nprint(1)",
|
||||||
|
content2: readFile(t, "testdata/notebooks/py2.ipynb"),
|
||||||
|
expected2: "# Databricks notebook source\nprint(2)",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "rJupyterNb.ipynb",
|
||||||
|
nameWithoutExt: "rJupyterNb",
|
||||||
|
content1: readFile(t, "testdata/notebooks/r1.ipynb"),
|
||||||
|
expected1: "# Databricks notebook source\nprint(1)",
|
||||||
|
content2: readFile(t, "testdata/notebooks/r2.ipynb"),
|
||||||
|
expected2: "# Databricks notebook source\nprint(2)",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "scalaJupyterNb.ipynb",
|
||||||
|
nameWithoutExt: "scalaJupyterNb",
|
||||||
|
content1: readFile(t, "testdata/notebooks/scala1.ipynb"),
|
||||||
|
expected1: "// Databricks notebook source\nprintln(1)",
|
||||||
|
content2: readFile(t, "testdata/notebooks/scala2.ipynb"),
|
||||||
|
expected2: "// Databricks notebook source\nprintln(2)",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "sqlJupyterNotebook.ipynb",
|
||||||
|
nameWithoutExt: "sqlJupyterNotebook",
|
||||||
|
content1: readFile(t, "testdata/notebooks/sql1.ipynb"),
|
||||||
|
expected1: "-- Databricks notebook source\nselect 1",
|
||||||
|
content2: readFile(t, "testdata/notebooks/sql2.ipynb"),
|
||||||
|
expected2: "-- Databricks notebook source\nselect 2",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range tcases {
|
||||||
|
f, _ := setupWsfsFiler(t)
|
||||||
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
// Upload the notebook
|
||||||
|
err = f.Write(ctx, tc.name, strings.NewReader(tc.content1))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
// Assert contents after initial upload
|
// Assert contents after initial upload. Note that we expect the content
|
||||||
filerTest{t, f}.assertContents(ctx, "pyNb", "# Databricks notebook source\nprint('first upload'))")
|
// for jupyter notebooks to be of type source because the workspace files
|
||||||
filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('first upload'))")
|
// client always uses the source format to read notebooks from the workspace.
|
||||||
filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"first upload\"")
|
filerTest{t, f}.assertContents(ctx, tc.nameWithoutExt, tc.expected1)
|
||||||
filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"first upload\"))")
|
|
||||||
filerTest{t, f}.assertContents(ctx, "jupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")")
|
|
||||||
|
|
||||||
// Assert uploading a second time fails due to overwrite mode missing
|
// Assert uploading a second time fails due to overwrite mode missing
|
||||||
err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('second upload'))"))
|
err = f.Write(ctx, tc.name, strings.NewReader(tc.content2))
|
||||||
assert.ErrorIs(t, err, fs.ErrExist)
|
assert.ErrorIs(t, err, fs.ErrExist)
|
||||||
assert.Regexp(t, regexp.MustCompile(`file already exists: .*/pyNb$`), err.Error())
|
assert.Regexp(t, regexp.MustCompile(`file already exists: .*/`+tc.nameWithoutExt+`$`), err.Error())
|
||||||
|
|
||||||
err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('second upload'))"))
|
// Try uploading the notebook again with overwrite flag. This time it should succeed.
|
||||||
assert.ErrorIs(t, err, fs.ErrExist)
|
err = f.Write(ctx, tc.name, strings.NewReader(tc.content2), filer.OverwriteIfExists)
|
||||||
assert.Regexp(t, regexp.MustCompile(`file already exists: .*/rNb$`), err.Error())
|
|
||||||
|
|
||||||
err = f.Write(ctx, "sqlNb.sql", strings.NewReader("# Databricks notebook source\n SELECT \"second upload\")"))
|
|
||||||
assert.ErrorIs(t, err, fs.ErrExist)
|
|
||||||
assert.Regexp(t, regexp.MustCompile(`file already exists: .*/sqlNb$`), err.Error())
|
|
||||||
|
|
||||||
err = f.Write(ctx, "scalaNb.scala", strings.NewReader("# Databricks notebook source\n println(\"second upload\"))"))
|
|
||||||
assert.ErrorIs(t, err, fs.ErrExist)
|
|
||||||
assert.Regexp(t, regexp.MustCompile(`file already exists: .*/scalaNb$`), err.Error())
|
|
||||||
|
|
||||||
err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent2))
|
|
||||||
assert.ErrorIs(t, err, fs.ErrExist)
|
|
||||||
assert.Regexp(t, regexp.MustCompile(`file already exists: .*/jupyterNb$`), err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) {
|
|
||||||
t.Parallel()
|
|
||||||
|
|
||||||
f, _ := setupWsfsFiler(t)
|
|
||||||
ctx := context.Background()
|
|
||||||
var err error
|
|
||||||
|
|
||||||
// Upload notebooks
|
|
||||||
err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('first upload'))"))
|
|
||||||
require.NoError(t, err)
|
|
||||||
err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('first upload'))"))
|
|
||||||
require.NoError(t, err)
|
|
||||||
err = f.Write(ctx, "sqlNb.sql", strings.NewReader("-- Databricks notebook source\n SELECT \"first upload\""))
|
|
||||||
require.NoError(t, err)
|
|
||||||
err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"first upload\"))"))
|
|
||||||
require.NoError(t, err)
|
|
||||||
err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent1))
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
// Assert contents after initial upload
|
// Assert contents after second upload
|
||||||
filerTest{t, f}.assertContents(ctx, "pyNb", "# Databricks notebook source\nprint('first upload'))")
|
filerTest{t, f}.assertContents(ctx, tc.nameWithoutExt, tc.expected2)
|
||||||
filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('first upload'))")
|
})
|
||||||
filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"first upload\"")
|
}
|
||||||
filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"first upload\"))")
|
|
||||||
filerTest{t, f}.assertContents(ctx, "jupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")")
|
|
||||||
|
|
||||||
// Upload notebooks a second time, overwriting the initial uplaods
|
|
||||||
err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('second upload'))"), filer.OverwriteIfExists)
|
|
||||||
require.NoError(t, err)
|
|
||||||
err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('second upload'))"), filer.OverwriteIfExists)
|
|
||||||
require.NoError(t, err)
|
|
||||||
err = f.Write(ctx, "sqlNb.sql", strings.NewReader("-- Databricks notebook source\n SELECT \"second upload\""), filer.OverwriteIfExists)
|
|
||||||
require.NoError(t, err)
|
|
||||||
err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"second upload\"))"), filer.OverwriteIfExists)
|
|
||||||
require.NoError(t, err)
|
|
||||||
err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent2), filer.OverwriteIfExists)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
// Assert contents have been overwritten
|
|
||||||
filerTest{t, f}.assertContents(ctx, "pyNb", "# Databricks notebook source\nprint('second upload'))")
|
|
||||||
filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('second upload'))")
|
|
||||||
filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"second upload\"")
|
|
||||||
filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"second upload\"))")
|
|
||||||
filerTest{t, f}.assertContents(ctx, "jupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 2\")")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) {
|
func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) {
|
||||||
|
@ -515,11 +484,13 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) {
|
||||||
{"foo.r", "print('foo')"},
|
{"foo.r", "print('foo')"},
|
||||||
{"foo.scala", "println('foo')"},
|
{"foo.scala", "println('foo')"},
|
||||||
{"foo.sql", "SELECT 'foo'"},
|
{"foo.sql", "SELECT 'foo'"},
|
||||||
{"jupyterNb.ipynb", jupyterNotebookContent1},
|
{"py1.ipynb", readFile(t, "testdata/notebooks/py1.ipynb")},
|
||||||
{"jupyterNb2.ipynb", jupyterNotebookContent2},
|
|
||||||
{"pyNb.py", "# Databricks notebook source\nprint('first upload'))"},
|
{"pyNb.py", "# Databricks notebook source\nprint('first upload'))"},
|
||||||
|
{"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")},
|
||||||
{"rNb.r", "# Databricks notebook source\nprint('first upload'))"},
|
{"rNb.r", "# Databricks notebook source\nprint('first upload'))"},
|
||||||
|
{"scala1.ipynb", readFile(t, "testdata/notebooks/scala1.ipynb")},
|
||||||
{"scalaNb.scala", "// Databricks notebook source\n println(\"first upload\"))"},
|
{"scalaNb.scala", "// Databricks notebook source\n println(\"first upload\"))"},
|
||||||
|
{"sql1.ipynb", readFile(t, "testdata/notebooks/sql1.ipynb")},
|
||||||
{"sqlNb.sql", "-- Databricks notebook source\n SELECT \"first upload\""},
|
{"sqlNb.sql", "-- Databricks notebook source\n SELECT \"first upload\""},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -554,11 +525,13 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) {
|
||||||
"foo.r",
|
"foo.r",
|
||||||
"foo.scala",
|
"foo.scala",
|
||||||
"foo.sql",
|
"foo.sql",
|
||||||
"jupyterNb.ipynb",
|
"py1.ipynb",
|
||||||
"jupyterNb2.ipynb",
|
|
||||||
"pyNb.py",
|
"pyNb.py",
|
||||||
|
"r1.ipynb",
|
||||||
"rNb.r",
|
"rNb.r",
|
||||||
|
"scala1.ipynb",
|
||||||
"scalaNb.scala",
|
"scalaNb.scala",
|
||||||
|
"sql1.ipynb",
|
||||||
"sqlNb.sql",
|
"sqlNb.sql",
|
||||||
}, names)
|
}, names)
|
||||||
|
|
||||||
|
@ -582,7 +555,10 @@ func setupFilerWithExtensionsTest(t *testing.T) filer.Filer {
|
||||||
}{
|
}{
|
||||||
{"foo.py", "# Databricks notebook source\nprint('first upload'))"},
|
{"foo.py", "# Databricks notebook source\nprint('first upload'))"},
|
||||||
{"bar.py", "print('foo')"},
|
{"bar.py", "print('foo')"},
|
||||||
{"jupyter.ipynb", jupyterNotebookContent1},
|
{"p1.ipynb", readFile(t, "testdata/notebooks/py1.ipynb")},
|
||||||
|
{"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")},
|
||||||
|
{"scala1.ipynb", readFile(t, "testdata/notebooks/scala1.ipynb")},
|
||||||
|
{"sql1.ipynb", readFile(t, "testdata/notebooks/sql1.ipynb")},
|
||||||
{"pretender", "not a notebook"},
|
{"pretender", "not a notebook"},
|
||||||
{"dir/file.txt", "file content"},
|
{"dir/file.txt", "file content"},
|
||||||
{"scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')"},
|
{"scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')"},
|
||||||
|
@ -608,11 +584,15 @@ func TestAccFilerWorkspaceFilesExtensionsRead(t *testing.T) {
|
||||||
// Read contents of test fixtures as a sanity check.
|
// Read contents of test fixtures as a sanity check.
|
||||||
filerTest{t, wf}.assertContents(ctx, "foo.py", "# Databricks notebook source\nprint('first upload'))")
|
filerTest{t, wf}.assertContents(ctx, "foo.py", "# Databricks notebook source\nprint('first upload'))")
|
||||||
filerTest{t, wf}.assertContents(ctx, "bar.py", "print('foo')")
|
filerTest{t, wf}.assertContents(ctx, "bar.py", "print('foo')")
|
||||||
filerTest{t, wf}.assertContentsJupyter(ctx, "jupyter.ipynb")
|
|
||||||
filerTest{t, wf}.assertContents(ctx, "dir/file.txt", "file content")
|
filerTest{t, wf}.assertContents(ctx, "dir/file.txt", "file content")
|
||||||
filerTest{t, wf}.assertContents(ctx, "scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')")
|
filerTest{t, wf}.assertContents(ctx, "scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')")
|
||||||
filerTest{t, wf}.assertContents(ctx, "pretender", "not a notebook")
|
filerTest{t, wf}.assertContents(ctx, "pretender", "not a notebook")
|
||||||
|
|
||||||
|
filerTest{t, wf}.assertContentsJupyter(ctx, "p1.ipynb", "python")
|
||||||
|
filerTest{t, wf}.assertContentsJupyter(ctx, "r1.ipynb", "r")
|
||||||
|
filerTest{t, wf}.assertContentsJupyter(ctx, "scala1.ipynb", "scala")
|
||||||
|
filerTest{t, wf}.assertContentsJupyter(ctx, "sql1.ipynb", "sql")
|
||||||
|
|
||||||
// Read non-existent file
|
// Read non-existent file
|
||||||
_, err := wf.Read(ctx, "non-existent.py")
|
_, err := wf.Read(ctx, "non-existent.py")
|
||||||
assert.ErrorIs(t, err, fs.ErrNotExist)
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
||||||
|
@ -638,35 +618,41 @@ func TestAccFilerWorkspaceFilesExtensionsDelete(t *testing.T) {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
wf := setupFilerWithExtensionsTest(t)
|
wf := setupFilerWithExtensionsTest(t)
|
||||||
|
|
||||||
// Delete notebook
|
for _, fileName := range []string{
|
||||||
err := wf.Delete(ctx, "foo.py")
|
// notebook
|
||||||
|
"foo.py",
|
||||||
|
// file
|
||||||
|
"bar.py",
|
||||||
|
// python jupyter notebook
|
||||||
|
"p1.ipynb",
|
||||||
|
// R jupyter notebook
|
||||||
|
"r1.ipynb",
|
||||||
|
// Scala jupyter notebook
|
||||||
|
"scala1.ipynb",
|
||||||
|
// SQL jupyter notebook
|
||||||
|
"sql1.ipynb",
|
||||||
|
} {
|
||||||
|
err := wf.Delete(ctx, fileName)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
filerTest{t, wf}.assertNotExists(ctx, "foo.py")
|
filerTest{t, wf}.assertNotExists(ctx, fileName)
|
||||||
|
}
|
||||||
|
|
||||||
// Delete file
|
for _, fileName := range []string{
|
||||||
err = wf.Delete(ctx, "bar.py")
|
// do not delete non-existent file
|
||||||
require.NoError(t, err)
|
"non-existent.py",
|
||||||
filerTest{t, wf}.assertNotExists(ctx, "bar.py")
|
// do not delete a file assuming it is a notebook and stripping the extension
|
||||||
|
"pretender.py",
|
||||||
// Delete jupyter notebook
|
// do not delete a Scala notebook as a Python notebook
|
||||||
err = wf.Delete(ctx, "jupyter.ipynb")
|
"scala-notebook.py",
|
||||||
require.NoError(t, err)
|
// do not delete a file assuming it is a Jupyter notebook and stripping the extension
|
||||||
filerTest{t, wf}.assertNotExists(ctx, "jupyter.ipynb")
|
"pretender.ipynb",
|
||||||
|
} {
|
||||||
// Delete non-existent file
|
err := wf.Delete(ctx, fileName)
|
||||||
err = wf.Delete(ctx, "non-existent.py")
|
|
||||||
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
||||||
|
|
||||||
// Ensure we do not delete a file as a notebook
|
|
||||||
err = wf.Delete(ctx, "pretender.py")
|
|
||||||
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
||||||
|
|
||||||
// Ensure we do not delete a Scala notebook as a Python notebook
|
|
||||||
_, err = wf.Read(ctx, "scala-notebook.py")
|
|
||||||
assert.ErrorIs(t, err, fs.ErrNotExist)
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
||||||
|
}
|
||||||
|
|
||||||
// Delete directory
|
// Delete directory
|
||||||
err = wf.Delete(ctx, "dir")
|
err := wf.Delete(ctx, "dir")
|
||||||
assert.ErrorIs(t, err, fs.ErrInvalid)
|
assert.ErrorIs(t, err, fs.ErrInvalid)
|
||||||
|
|
||||||
// Delete directory recursively
|
// Delete directory recursively
|
||||||
|
@ -681,44 +667,45 @@ func TestAccFilerWorkspaceFilesExtensionsStat(t *testing.T) {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
wf := setupFilerWithExtensionsTest(t)
|
wf := setupFilerWithExtensionsTest(t)
|
||||||
|
|
||||||
// Stat on a notebook
|
for _, fileName := range []string{
|
||||||
info, err := wf.Stat(ctx, "foo.py")
|
// notebook
|
||||||
|
"foo.py",
|
||||||
|
// file
|
||||||
|
"bar.py",
|
||||||
|
// python jupyter notebook
|
||||||
|
"p1.ipynb",
|
||||||
|
// R jupyter notebook
|
||||||
|
"r1.ipynb",
|
||||||
|
// Scala jupyter notebook
|
||||||
|
"scala1.ipynb",
|
||||||
|
// SQL jupyter notebook
|
||||||
|
"sql1.ipynb",
|
||||||
|
} {
|
||||||
|
info, err := wf.Stat(ctx, fileName)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.Equal(t, "foo.py", info.Name())
|
assert.Equal(t, fileName, info.Name())
|
||||||
assert.False(t, info.IsDir())
|
|
||||||
|
|
||||||
// Stat on a file
|
|
||||||
info, err = wf.Stat(ctx, "bar.py")
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, "bar.py", info.Name())
|
|
||||||
assert.False(t, info.IsDir())
|
|
||||||
|
|
||||||
// Stat on a Jupyter notebook
|
|
||||||
info, err = wf.Stat(ctx, "jupyter.ipynb")
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.Equal(t, "jupyter.ipynb", info.Name())
|
|
||||||
assert.False(t, info.IsDir())
|
assert.False(t, info.IsDir())
|
||||||
|
}
|
||||||
|
|
||||||
// Stat on a directory
|
// Stat on a directory
|
||||||
info, err = wf.Stat(ctx, "dir")
|
info, err := wf.Stat(ctx, "dir")
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.Equal(t, "dir", info.Name())
|
assert.Equal(t, "dir", info.Name())
|
||||||
assert.True(t, info.IsDir())
|
assert.True(t, info.IsDir())
|
||||||
|
|
||||||
// Stat on a non-existent file
|
for _, fileName := range []string{
|
||||||
_, err = wf.Stat(ctx, "non-existent.py")
|
// non-existent file
|
||||||
assert.ErrorIs(t, err, fs.ErrNotExist)
|
"non-existent.py",
|
||||||
|
// do not stat a file assuming it is a notebook and stripping the extension
|
||||||
// Ensure we do not stat a file as a notebook
|
"pretender.py",
|
||||||
_, err = wf.Stat(ctx, "pretender.py")
|
// do not stat a Scala notebook as a Python notebook
|
||||||
assert.ErrorIs(t, err, fs.ErrNotExist)
|
"scala-notebook.py",
|
||||||
|
// do not read a regular file assuming it is a Jupyter notebook and stripping the extension
|
||||||
// Ensure we do not stat a Scala notebook as a Python notebook
|
"pretender.ipynb",
|
||||||
_, err = wf.Stat(ctx, "scala-notebook.py")
|
} {
|
||||||
assert.ErrorIs(t, err, fs.ErrNotExist)
|
_, err := wf.Stat(ctx, fileName)
|
||||||
|
|
||||||
_, err = wf.Stat(ctx, "pretender.ipynb")
|
|
||||||
assert.ErrorIs(t, err, fs.ErrNotExist)
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAccWorkspaceFilesExtensionsDirectoriesAreNotNotebooks(t *testing.T) {
|
func TestAccWorkspaceFilesExtensionsDirectoriesAreNotNotebooks(t *testing.T) {
|
||||||
|
@ -739,32 +726,115 @@ func TestAccWorkspaceFilesExtensionsDirectoriesAreNotNotebooks(t *testing.T) {
|
||||||
func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) {
|
func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
|
// Case 1: Writing source notebooks.
|
||||||
|
for _, tc := range []struct {
|
||||||
|
language string
|
||||||
|
sourceName string
|
||||||
|
sourceContent string
|
||||||
|
jupyterName string
|
||||||
|
jupyterContent string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
language: "python",
|
||||||
|
sourceName: "foo.py",
|
||||||
|
sourceContent: "# Databricks notebook source\nprint('foo')",
|
||||||
|
jupyterName: "foo.ipynb",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
language: "r",
|
||||||
|
sourceName: "foo.r",
|
||||||
|
sourceContent: "# Databricks notebook source\nprint('foo')",
|
||||||
|
jupyterName: "foo.ipynb",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
language: "scala",
|
||||||
|
sourceName: "foo.scala",
|
||||||
|
sourceContent: "// Databricks notebook source\nprintln('foo')",
|
||||||
|
jupyterName: "foo.ipynb",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
language: "sql",
|
||||||
|
sourceName: "foo.sql",
|
||||||
|
sourceContent: "-- Databricks notebook source\nselect 'foo'",
|
||||||
|
jupyterName: "foo.ipynb",
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run("source_"+tc.language, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
wf, _ := setupWsfsExtensionsFiler(t)
|
wf, _ := setupWsfsExtensionsFiler(t)
|
||||||
|
|
||||||
// Case 1: Source Notebook
|
err := wf.Write(ctx, tc.sourceName, strings.NewReader(tc.sourceContent))
|
||||||
err := wf.Write(ctx, "foo.py", strings.NewReader("# Databricks notebook source\nprint('foo')"))
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
// The source notebook should exist but not the Jupyter notebook
|
// Assert on the content of the source notebook that's been written.
|
||||||
filerTest{t, wf}.assertContents(ctx, "foo.py", "# Databricks notebook source\nprint('foo')")
|
filerTest{t, wf}.assertContents(ctx, tc.sourceName, tc.sourceContent)
|
||||||
_, err = wf.Stat(ctx, "foo.ipynb")
|
|
||||||
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
||||||
_, err = wf.Read(ctx, "foo.ipynb")
|
|
||||||
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
||||||
err = wf.Delete(ctx, "foo.ipynb")
|
|
||||||
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
||||||
|
|
||||||
// Case 2: Jupyter Notebook
|
// Ensure that the source notebook is not read when the name contains
|
||||||
err = wf.Write(ctx, "bar.ipynb", strings.NewReader(jupyterNotebookContent1))
|
// the .ipynb extension.
|
||||||
|
_, err = wf.Stat(ctx, tc.jupyterName)
|
||||||
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
||||||
|
_, err = wf.Read(ctx, tc.jupyterName)
|
||||||
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
||||||
|
err = wf.Delete(ctx, tc.jupyterName)
|
||||||
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Case 2: Writing Jupyter notebooks.
|
||||||
|
for _, tc := range []struct {
|
||||||
|
language string
|
||||||
|
sourceName string
|
||||||
|
jupyterName string
|
||||||
|
jupyterContent string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
language: "python",
|
||||||
|
sourceName: "foo.py",
|
||||||
|
jupyterName: "foo.ipynb",
|
||||||
|
jupyterContent: readFile(t, "testdata/notebooks/py1.ipynb"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
language: "r",
|
||||||
|
sourceName: "foo.r",
|
||||||
|
jupyterName: "foo.ipynb",
|
||||||
|
jupyterContent: readFile(t, "testdata/notebooks/r1.ipynb"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
language: "scala",
|
||||||
|
sourceName: "foo.scala",
|
||||||
|
jupyterName: "foo.ipynb",
|
||||||
|
jupyterContent: readFile(t, "testdata/notebooks/scala1.ipynb"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
language: "sql",
|
||||||
|
sourceName: "foo.sql",
|
||||||
|
jupyterName: "foo.ipynb",
|
||||||
|
jupyterContent: readFile(t, "testdata/notebooks/sql1.ipynb"),
|
||||||
|
},
|
||||||
|
} {
|
||||||
|
t.Run("jupyter_"+tc.language, func(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
wf, _ := setupWsfsExtensionsFiler(t)
|
||||||
|
|
||||||
|
err := wf.Write(ctx, tc.jupyterName, strings.NewReader(tc.jupyterContent))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
// The Jupyter notebook should exist but not the source notebook
|
// Assert that the written notebook is jupyter and has the correct
|
||||||
filerTest{t, wf}.assertContentsJupyter(ctx, "bar.ipynb")
|
// language_info metadata set.
|
||||||
_, err = wf.Stat(ctx, "bar.py")
|
filerTest{t, wf}.assertContentsJupyter(ctx, tc.jupyterName, tc.language)
|
||||||
|
|
||||||
|
// Ensure that the Jupyter notebook is not read when the name does not
|
||||||
|
// contain the .ipynb extension.
|
||||||
|
_, err = wf.Stat(ctx, tc.sourceName)
|
||||||
assert.ErrorIs(t, err, fs.ErrNotExist)
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
||||||
_, err = wf.Read(ctx, "bar.py")
|
_, err = wf.Read(ctx, tc.sourceName)
|
||||||
assert.ErrorIs(t, err, fs.ErrNotExist)
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
||||||
err = wf.Delete(ctx, "bar.py")
|
err = wf.Delete(ctx, tc.sourceName)
|
||||||
assert.ErrorIs(t, err, fs.ErrNotExist)
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -352,6 +352,13 @@ func RequireErrorRun(t *testing.T, args ...string) (bytes.Buffer, bytes.Buffer,
|
||||||
return stdout, stderr, err
|
return stdout, stderr, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func readFile(t *testing.T, name string) string {
|
||||||
|
b, err := os.ReadFile(name)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
return string(b)
|
||||||
|
}
|
||||||
|
|
||||||
func writeFile(t *testing.T, name string, body string) string {
|
func writeFile(t *testing.T, name string, body string) string {
|
||||||
f, err := os.Create(filepath.Join(t.TempDir(), name))
|
f, err := os.Create(filepath.Join(t.TempDir(), name))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
@ -562,12 +569,10 @@ func setupLocalFiler(t *testing.T) (filer.Filer, string) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func setupWsfsFiler(t *testing.T) (filer.Filer, string) {
|
func setupWsfsFiler(t *testing.T) (filer.Filer, string) {
|
||||||
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
|
ctx, wt := acc.WorkspaceTest(t)
|
||||||
|
|
||||||
ctx := context.Background()
|
tmpdir := TemporaryWorkspaceDir(t, wt.W)
|
||||||
w := databricks.Must(databricks.NewWorkspaceClient())
|
f, err := filer.NewWorkspaceFilesClient(wt.W, tmpdir)
|
||||||
tmpdir := TemporaryWorkspaceDir(t, w)
|
|
||||||
f, err := filer.NewWorkspaceFilesClient(w, tmpdir)
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
// Check if we can use this API here, skip test if we cannot.
|
// Check if we can use this API here, skip test if we cannot.
|
||||||
|
@ -581,11 +586,10 @@ func setupWsfsFiler(t *testing.T) (filer.Filer, string) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func setupWsfsExtensionsFiler(t *testing.T) (filer.Filer, string) {
|
func setupWsfsExtensionsFiler(t *testing.T) (filer.Filer, string) {
|
||||||
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
|
_, wt := acc.WorkspaceTest(t)
|
||||||
|
|
||||||
w := databricks.Must(databricks.NewWorkspaceClient())
|
tmpdir := TemporaryWorkspaceDir(t, wt.W)
|
||||||
tmpdir := TemporaryWorkspaceDir(t, w)
|
f, err := filer.NewWorkspaceFilesExtensionsClient(wt.W, tmpdir)
|
||||||
f, err := filer.NewWorkspaceFilesExtensionsClient(w, tmpdir)
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
return f, tmpdir
|
return f, tmpdir
|
||||||
|
|
|
@ -0,0 +1,27 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(1)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.8.13"
|
||||||
|
},
|
||||||
|
"orig_nbformat": 4
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
|
@ -0,0 +1,27 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(2)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.8.13"
|
||||||
|
},
|
||||||
|
"orig_nbformat": 4
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
|
@ -0,0 +1,25 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(1)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "R",
|
||||||
|
"language": "R",
|
||||||
|
"name": "ir"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "R"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
|
@ -0,0 +1,29 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"vscode": {
|
||||||
|
"languageId": "r"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"print(2)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "R",
|
||||||
|
"language": "R",
|
||||||
|
"name": "ir"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "R"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
|
@ -0,0 +1,38 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"1\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"println(1)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Scala",
|
||||||
|
"language": "scala",
|
||||||
|
"name": "scala"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": "text/x-scala",
|
||||||
|
"file_extension": ".sc",
|
||||||
|
"mimetype": "text/x-scala",
|
||||||
|
"name": "scala",
|
||||||
|
"nbconvert_exporter": "script",
|
||||||
|
"version": "2.13.14"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
|
@ -0,0 +1,38 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"1\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"println(2)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Scala",
|
||||||
|
"language": "scala",
|
||||||
|
"name": "scala"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": "text/x-scala",
|
||||||
|
"file_extension": ".sc",
|
||||||
|
"mimetype": "text/x-scala",
|
||||||
|
"name": "scala",
|
||||||
|
"nbconvert_exporter": "script",
|
||||||
|
"version": "2.13.14"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"select 1"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"language_info": {
|
||||||
|
"name": "sql"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
|
@ -0,0 +1,20 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"select 2"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"language_info": {
|
||||||
|
"name": "sql"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
|
@ -7,6 +7,7 @@ import (
|
||||||
"io"
|
"io"
|
||||||
"io/fs"
|
"io/fs"
|
||||||
"path"
|
"path"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/databricks/cli/libs/log"
|
"github.com/databricks/cli/libs/log"
|
||||||
|
@ -23,14 +24,6 @@ type workspaceFilesExtensionsClient struct {
|
||||||
readonly bool
|
readonly bool
|
||||||
}
|
}
|
||||||
|
|
||||||
var extensionsToLanguages = map[string]workspace.Language{
|
|
||||||
".py": workspace.LanguagePython,
|
|
||||||
".r": workspace.LanguageR,
|
|
||||||
".scala": workspace.LanguageScala,
|
|
||||||
".sql": workspace.LanguageSql,
|
|
||||||
".ipynb": workspace.LanguagePython,
|
|
||||||
}
|
|
||||||
|
|
||||||
type workspaceFileStatus struct {
|
type workspaceFileStatus struct {
|
||||||
wsfsFileInfo
|
wsfsFileInfo
|
||||||
|
|
||||||
|
@ -54,7 +47,12 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex
|
||||||
nameWithoutExt := strings.TrimSuffix(name, ext)
|
nameWithoutExt := strings.TrimSuffix(name, ext)
|
||||||
|
|
||||||
// File name does not have an extension associated with Databricks notebooks, return early.
|
// File name does not have an extension associated with Databricks notebooks, return early.
|
||||||
if _, ok := extensionsToLanguages[ext]; !ok {
|
if !slices.Contains([]string{
|
||||||
|
notebook.ExtensionPython,
|
||||||
|
notebook.ExtensionR,
|
||||||
|
notebook.ExtensionScala,
|
||||||
|
notebook.ExtensionSql,
|
||||||
|
notebook.ExtensionJupyter}, ext) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -75,22 +73,23 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Not the correct language. Return early.
|
// Not the correct language. Return early. Note: All languages are supported
|
||||||
if stat.Language != extensionsToLanguages[ext] {
|
// for Jupyter notebooks.
|
||||||
log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not of the correct language. Expected %s but found %s.", name, path.Join(w.root, nameWithoutExt), extensionsToLanguages[ext], stat.Language)
|
if ext != notebook.ExtensionJupyter && stat.Language != notebook.ExtensionToLanguage[ext] {
|
||||||
|
log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not of the correct language. Expected %s but found %s.", name, path.Join(w.root, nameWithoutExt), notebook.ExtensionToLanguage[ext], stat.Language)
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// When the extension is .py we expect the export format to be source.
|
// For non-jupyter notebooks the export format should be source.
|
||||||
// If it's not, return early.
|
// If it's not, return early.
|
||||||
if ext == ".py" && stat.ReposExportFormat != workspace.ExportFormatSource {
|
if ext != notebook.ExtensionJupyter && stat.ReposExportFormat != workspace.ExportFormatSource {
|
||||||
log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not exported as a source notebook. Its export format is %s.", name, path.Join(w.root, nameWithoutExt), stat.ReposExportFormat)
|
log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not exported as a source notebook. Its export format is %s.", name, path.Join(w.root, nameWithoutExt), stat.ReposExportFormat)
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// When the extension is .ipynb we expect the export format to be Jupyter.
|
// When the extension is .ipynb we expect the export format to be Jupyter.
|
||||||
// If it's not, return early.
|
// If it's not, return early.
|
||||||
if ext == ".ipynb" && stat.ReposExportFormat != workspace.ExportFormatJupyter {
|
if ext == notebook.ExtensionJupyter && stat.ReposExportFormat != workspace.ExportFormatJupyter {
|
||||||
log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not exported as a Jupyter notebook. Its export format is %s.", name, path.Join(w.root, nameWithoutExt), stat.ReposExportFormat)
|
log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not exported as a Jupyter notebook. Its export format is %s.", name, path.Join(w.root, nameWithoutExt), stat.ReposExportFormat)
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
@ -120,8 +119,8 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx con
|
||||||
ext := notebook.GetExtensionByLanguage(&stat.ObjectInfo)
|
ext := notebook.GetExtensionByLanguage(&stat.ObjectInfo)
|
||||||
|
|
||||||
// If the notebook was exported as a Jupyter notebook, the extension should be .ipynb.
|
// If the notebook was exported as a Jupyter notebook, the extension should be .ipynb.
|
||||||
if stat.Language == workspace.LanguagePython && stat.ReposExportFormat == workspace.ExportFormatJupyter {
|
if stat.ReposExportFormat == workspace.ExportFormatJupyter {
|
||||||
ext = ".ipynb"
|
ext = notebook.ExtensionJupyter
|
||||||
}
|
}
|
||||||
|
|
||||||
// Modify the stat object path to include the extension. This stat object will be used
|
// Modify the stat object path to include the extension. This stat object will be used
|
||||||
|
|
|
@ -37,7 +37,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) {
|
||||||
expectedError string
|
expectedError string
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
name: "python source notebook and file",
|
name: "python source notebook and file with source extension",
|
||||||
language: workspace.LanguagePython,
|
language: workspace.LanguagePython,
|
||||||
notebookExportFormat: workspace.ExportFormatSource,
|
notebookExportFormat: workspace.ExportFormatSource,
|
||||||
notebookPath: "/dir/foo",
|
notebookPath: "/dir/foo",
|
||||||
|
@ -45,7 +45,31 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) {
|
||||||
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.py resolve to the same name /foo.py. Changing the name of one of these objects will resolve this issue",
|
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.py resolve to the same name /foo.py. Changing the name of one of these objects will resolve this issue",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "python jupyter notebook and file",
|
name: "scala source notebook and file with source extension",
|
||||||
|
language: workspace.LanguageScala,
|
||||||
|
notebookExportFormat: workspace.ExportFormatSource,
|
||||||
|
notebookPath: "/dir/foo",
|
||||||
|
filePath: "/dir/foo.scala",
|
||||||
|
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.scala resolve to the same name /foo.scala. Changing the name of one of these objects will resolve this issue",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "r source notebook and file with source extension",
|
||||||
|
language: workspace.LanguageR,
|
||||||
|
notebookExportFormat: workspace.ExportFormatSource,
|
||||||
|
notebookPath: "/dir/foo",
|
||||||
|
filePath: "/dir/foo.r",
|
||||||
|
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.r resolve to the same name /foo.r. Changing the name of one of these objects will resolve this issue",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "sql source notebook and file with source extension",
|
||||||
|
language: workspace.LanguageSql,
|
||||||
|
notebookExportFormat: workspace.ExportFormatSource,
|
||||||
|
notebookPath: "/dir/foo",
|
||||||
|
filePath: "/dir/foo.sql",
|
||||||
|
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.sql resolve to the same name /foo.sql. Changing the name of one of these objects will resolve this issue",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "python jupyter notebook and file with source extension",
|
||||||
language: workspace.LanguagePython,
|
language: workspace.LanguagePython,
|
||||||
notebookExportFormat: workspace.ExportFormatJupyter,
|
notebookExportFormat: workspace.ExportFormatJupyter,
|
||||||
notebookPath: "/dir/foo",
|
notebookPath: "/dir/foo",
|
||||||
|
@ -54,37 +78,64 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) {
|
||||||
expectedError: "",
|
expectedError: "",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "scala source notebook and file",
|
name: "scala jupyter notebook and file with source extension",
|
||||||
language: workspace.LanguageScala,
|
language: workspace.LanguageScala,
|
||||||
notebookExportFormat: workspace.ExportFormatSource,
|
notebookExportFormat: workspace.ExportFormatJupyter,
|
||||||
notebookPath: "/dir/foo",
|
notebookPath: "/dir/foo",
|
||||||
filePath: "/dir/foo.scala",
|
filePath: "/dir/foo.scala",
|
||||||
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.scala resolve to the same name /foo.scala. Changing the name of one of these objects will resolve this issue",
|
// Jupyter notebooks would correspond to foo.ipynb so an error is not expected.
|
||||||
|
expectedError: "",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "r source notebook and file",
|
name: "sql jupyter notebook and file with source extension",
|
||||||
language: workspace.LanguageR,
|
|
||||||
notebookExportFormat: workspace.ExportFormatSource,
|
|
||||||
notebookPath: "/dir/foo",
|
|
||||||
filePath: "/dir/foo.r",
|
|
||||||
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.r resolve to the same name /foo.r. Changing the name of one of these objects will resolve this issue",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "sql source notebook and file",
|
|
||||||
language: workspace.LanguageSql,
|
language: workspace.LanguageSql,
|
||||||
notebookExportFormat: workspace.ExportFormatSource,
|
notebookExportFormat: workspace.ExportFormatJupyter,
|
||||||
notebookPath: "/dir/foo",
|
notebookPath: "/dir/foo",
|
||||||
filePath: "/dir/foo.sql",
|
filePath: "/dir/foo.sql",
|
||||||
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.sql resolve to the same name /foo.sql. Changing the name of one of these objects will resolve this issue",
|
// Jupyter notebooks would correspond to foo.ipynb so an error is not expected.
|
||||||
|
expectedError: "",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "python jupyter notebook and file",
|
name: "r jupyter notebook and file with source extension",
|
||||||
|
language: workspace.LanguageR,
|
||||||
|
notebookExportFormat: workspace.ExportFormatJupyter,
|
||||||
|
notebookPath: "/dir/foo",
|
||||||
|
filePath: "/dir/foo.sql",
|
||||||
|
// Jupyter notebooks would correspond to foo.ipynb so an error is not expected.
|
||||||
|
expectedError: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "python jupyter notebook and file with .ipynb extension",
|
||||||
language: workspace.LanguagePython,
|
language: workspace.LanguagePython,
|
||||||
notebookExportFormat: workspace.ExportFormatJupyter,
|
notebookExportFormat: workspace.ExportFormatJupyter,
|
||||||
notebookPath: "/dir/foo",
|
notebookPath: "/dir/foo",
|
||||||
filePath: "/dir/foo.ipynb",
|
filePath: "/dir/foo.ipynb",
|
||||||
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue",
|
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue",
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "scala jupyter notebook and file with .ipynb extension",
|
||||||
|
language: workspace.LanguageScala,
|
||||||
|
notebookExportFormat: workspace.ExportFormatJupyter,
|
||||||
|
notebookPath: "/dir/foo",
|
||||||
|
filePath: "/dir/foo.ipynb",
|
||||||
|
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "r jupyter notebook and file with .ipynb extension",
|
||||||
|
language: workspace.LanguageR,
|
||||||
|
notebookExportFormat: workspace.ExportFormatJupyter,
|
||||||
|
notebookPath: "/dir/foo",
|
||||||
|
filePath: "/dir/foo.ipynb",
|
||||||
|
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "sql jupyter notebook and file with .ipynb extension",
|
||||||
|
language: workspace.LanguageSql,
|
||||||
|
notebookExportFormat: workspace.ExportFormatJupyter,
|
||||||
|
notebookPath: "/dir/foo",
|
||||||
|
filePath: "/dir/foo.ipynb",
|
||||||
|
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue",
|
||||||
|
},
|
||||||
} {
|
} {
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
t.Run(tc.name, func(t *testing.T) {
|
||||||
mockedWorkspaceClient := mocks.NewMockWorkspaceClient(t)
|
mockedWorkspaceClient := mocks.NewMockWorkspaceClient(t)
|
||||||
|
|
|
@ -107,19 +107,19 @@ func DetectWithFS(fsys fs.FS, name string) (notebook bool, language workspace.La
|
||||||
// Determine which header to expect based on filename extension.
|
// Determine which header to expect based on filename extension.
|
||||||
ext := strings.ToLower(filepath.Ext(name))
|
ext := strings.ToLower(filepath.Ext(name))
|
||||||
switch ext {
|
switch ext {
|
||||||
case ".py":
|
case ExtensionPython:
|
||||||
header = `# Databricks notebook source`
|
header = `# Databricks notebook source`
|
||||||
language = workspace.LanguagePython
|
language = workspace.LanguagePython
|
||||||
case ".r":
|
case ExtensionR:
|
||||||
header = `# Databricks notebook source`
|
header = `# Databricks notebook source`
|
||||||
language = workspace.LanguageR
|
language = workspace.LanguageR
|
||||||
case ".scala":
|
case ExtensionScala:
|
||||||
header = "// Databricks notebook source"
|
header = "// Databricks notebook source"
|
||||||
language = workspace.LanguageScala
|
language = workspace.LanguageScala
|
||||||
case ".sql":
|
case ExtensionSql:
|
||||||
header = "-- Databricks notebook source"
|
header = "-- Databricks notebook source"
|
||||||
language = workspace.LanguageSql
|
language = workspace.LanguageSql
|
||||||
case ".ipynb":
|
case ExtensionJupyter:
|
||||||
return DetectJupyterWithFS(fsys, name)
|
return DetectJupyterWithFS(fsys, name)
|
||||||
default:
|
default:
|
||||||
return false, "", nil
|
return false, "", nil
|
||||||
|
|
|
@ -2,22 +2,40 @@ package notebook
|
||||||
|
|
||||||
import "github.com/databricks/databricks-sdk-go/service/workspace"
|
import "github.com/databricks/databricks-sdk-go/service/workspace"
|
||||||
|
|
||||||
|
const (
|
||||||
|
ExtensionNone string = ""
|
||||||
|
ExtensionPython string = ".py"
|
||||||
|
ExtensionR string = ".r"
|
||||||
|
ExtensionScala string = ".scala"
|
||||||
|
ExtensionSql string = ".sql"
|
||||||
|
ExtensionJupyter string = ".ipynb"
|
||||||
|
)
|
||||||
|
|
||||||
|
var ExtensionToLanguage = map[string]workspace.Language{
|
||||||
|
ExtensionPython: workspace.LanguagePython,
|
||||||
|
ExtensionR: workspace.LanguageR,
|
||||||
|
ExtensionScala: workspace.LanguageScala,
|
||||||
|
ExtensionSql: workspace.LanguageSql,
|
||||||
|
|
||||||
|
// The platform supports all languages (Python, R, Scala, and SQL) for Jupyter notebooks.
|
||||||
|
}
|
||||||
|
|
||||||
func GetExtensionByLanguage(objectInfo *workspace.ObjectInfo) string {
|
func GetExtensionByLanguage(objectInfo *workspace.ObjectInfo) string {
|
||||||
if objectInfo.ObjectType != workspace.ObjectTypeNotebook {
|
if objectInfo.ObjectType != workspace.ObjectTypeNotebook {
|
||||||
return ""
|
return ExtensionNone
|
||||||
}
|
}
|
||||||
|
|
||||||
switch objectInfo.Language {
|
switch objectInfo.Language {
|
||||||
case workspace.LanguagePython:
|
case workspace.LanguagePython:
|
||||||
return ".py"
|
return ExtensionPython
|
||||||
case workspace.LanguageR:
|
case workspace.LanguageR:
|
||||||
return ".r"
|
return ExtensionR
|
||||||
case workspace.LanguageScala:
|
case workspace.LanguageScala:
|
||||||
return ".scala"
|
return ExtensionScala
|
||||||
case workspace.LanguageSql:
|
case workspace.LanguageSql:
|
||||||
return ".sql"
|
return ExtensionSql
|
||||||
default:
|
default:
|
||||||
// Do not add any extension to the file name
|
// Do not add any extension to the file name
|
||||||
return ""
|
return ExtensionNone
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue