diff --git a/internal/filer_test.go b/internal/filer_test.go index bc4c9480..20207d34 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -39,7 +39,7 @@ func (f filerTest) assertContents(ctx context.Context, name string, contents str assert.Equal(f, contents, body.String()) } -func (f filerTest) assertContentsJupyter(ctx context.Context, name string) { +func (f filerTest) assertContentsJupyter(ctx context.Context, name string, language string) { reader, err := f.Read(ctx, name) if !assert.NoError(f, err) { return @@ -62,6 +62,7 @@ func (f filerTest) assertContentsJupyter(ctx context.Context, name string) { // Since a roundtrip to the workspace changes a Jupyter notebook's payload, // the best we can do is assert that the nbformat is correct. assert.EqualValues(f, 4, actual["nbformat"]) + assert.Equal(f, language, actual["metadata"].(map[string]any)["language_info"].(map[string]any)["name"]) } func (f filerTest) assertNotExists(ctx context.Context, name string) { @@ -360,146 +361,114 @@ func TestAccFilerReadDir(t *testing.T) { } } -var jupyterNotebookContent1 = ` -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Jupyter Notebook Version 1\")" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 - } -` - -var jupyterNotebookContent2 = ` -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Jupyter Notebook Version 2\")" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 - } -` - -func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { +func TestAccFilerWorkspaceNotebook(t *testing.T) { t.Parallel() - f, _ := setupWsfsFiler(t) ctx := context.Background() var err error - // Upload the notebooks - err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('first upload'))")) - require.NoError(t, err) - err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('first upload'))")) - require.NoError(t, err) - err = f.Write(ctx, "sqlNb.sql", strings.NewReader("-- Databricks notebook source\n SELECT \"first upload\"")) - require.NoError(t, err) - err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"first upload\"))")) - require.NoError(t, err) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent1)) - require.NoError(t, err) + tcases := []struct { + name string + nameWithoutExt string + content1 string + expected1 string + content2 string + expected2 string + }{ + { + name: "pyNb.py", + nameWithoutExt: "pyNb", + content1: "# Databricks notebook source\nprint('first upload')", + expected1: "# Databricks notebook source\nprint('first upload')", + content2: "# Databricks notebook source\nprint('second upload')", + expected2: "# Databricks notebook source\nprint('second upload')", + }, + { + name: "rNb.r", + nameWithoutExt: "rNb", + content1: "# Databricks notebook source\nprint('first upload')", + expected1: "# Databricks notebook source\nprint('first upload')", + content2: "# Databricks notebook source\nprint('second upload')", + expected2: "# Databricks notebook source\nprint('second upload')", + }, + { + name: "sqlNb.sql", + nameWithoutExt: "sqlNb", + content1: "-- Databricks notebook source\n SELECT \"first upload\"", + expected1: "-- Databricks notebook source\n SELECT \"first upload\"", + content2: "-- Databricks notebook source\n SELECT \"second upload\"", + expected2: "-- Databricks notebook source\n SELECT \"second upload\"", + }, + { + name: "scalaNb.scala", + nameWithoutExt: "scalaNb", + content1: "// Databricks notebook source\n println(\"first upload\")", + expected1: "// Databricks notebook source\n println(\"first upload\")", + content2: "// Databricks notebook source\n println(\"second upload\")", + expected2: "// Databricks notebook source\n println(\"second upload\")", + }, + { + name: "pythonJupyterNb.ipynb", + nameWithoutExt: "pythonJupyterNb", + content1: readFile(t, "testdata/notebooks/py1.ipynb"), + expected1: "# Databricks notebook source\nprint(1)", + content2: readFile(t, "testdata/notebooks/py2.ipynb"), + expected2: "# Databricks notebook source\nprint(2)", + }, + { + name: "rJupyterNb.ipynb", + nameWithoutExt: "rJupyterNb", + content1: readFile(t, "testdata/notebooks/r1.ipynb"), + expected1: "# Databricks notebook source\nprint(1)", + content2: readFile(t, "testdata/notebooks/r2.ipynb"), + expected2: "# Databricks notebook source\nprint(2)", + }, + { + name: "scalaJupyterNb.ipynb", + nameWithoutExt: "scalaJupyterNb", + content1: readFile(t, "testdata/notebooks/scala1.ipynb"), + expected1: "// Databricks notebook source\nprintln(1)", + content2: readFile(t, "testdata/notebooks/scala2.ipynb"), + expected2: "// Databricks notebook source\nprintln(2)", + }, + { + name: "sqlJupyterNotebook.ipynb", + nameWithoutExt: "sqlJupyterNotebook", + content1: readFile(t, "testdata/notebooks/sql1.ipynb"), + expected1: "-- Databricks notebook source\nselect 1", + content2: readFile(t, "testdata/notebooks/sql2.ipynb"), + expected2: "-- Databricks notebook source\nselect 2", + }, + } - // Assert contents after initial upload - filerTest{t, f}.assertContents(ctx, "pyNb", "# Databricks notebook source\nprint('first upload'))") - filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('first upload'))") - filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"first upload\"") - filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"first upload\"))") - filerTest{t, f}.assertContents(ctx, "jupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")") + for _, tc := range tcases { + f, _ := setupWsfsFiler(t) + t.Run(tc.name, func(t *testing.T) { + t.Parallel() - // Assert uploading a second time fails due to overwrite mode missing - err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('second upload'))")) - assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/pyNb$`), err.Error()) + // Upload the notebook + err = f.Write(ctx, tc.name, strings.NewReader(tc.content1)) + require.NoError(t, err) - err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('second upload'))")) - assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/rNb$`), err.Error()) + // Assert contents after initial upload. Note that we expect the content + // for jupyter notebooks to be of type source because the workspace files + // client always uses the source format to read notebooks from the workspace. + filerTest{t, f}.assertContents(ctx, tc.nameWithoutExt, tc.expected1) - err = f.Write(ctx, "sqlNb.sql", strings.NewReader("# Databricks notebook source\n SELECT \"second upload\")")) - assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/sqlNb$`), err.Error()) + // Assert uploading a second time fails due to overwrite mode missing + err = f.Write(ctx, tc.name, strings.NewReader(tc.content2)) + assert.ErrorIs(t, err, fs.ErrExist) + assert.Regexp(t, regexp.MustCompile(`file already exists: .*/`+tc.nameWithoutExt+`$`), err.Error()) - err = f.Write(ctx, "scalaNb.scala", strings.NewReader("# Databricks notebook source\n println(\"second upload\"))")) - assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/scalaNb$`), err.Error()) + // Try uploading the notebook again with overwrite flag. This time it should succeed. + err = f.Write(ctx, tc.name, strings.NewReader(tc.content2), filer.OverwriteIfExists) + require.NoError(t, err) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent2)) - assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/jupyterNb$`), err.Error()) -} + // Assert contents after second upload + filerTest{t, f}.assertContents(ctx, tc.nameWithoutExt, tc.expected2) + }) + } -func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) { - t.Parallel() - - f, _ := setupWsfsFiler(t) - ctx := context.Background() - var err error - - // Upload notebooks - err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('first upload'))")) - require.NoError(t, err) - err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('first upload'))")) - require.NoError(t, err) - err = f.Write(ctx, "sqlNb.sql", strings.NewReader("-- Databricks notebook source\n SELECT \"first upload\"")) - require.NoError(t, err) - err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"first upload\"))")) - require.NoError(t, err) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent1)) - require.NoError(t, err) - - // Assert contents after initial upload - filerTest{t, f}.assertContents(ctx, "pyNb", "# Databricks notebook source\nprint('first upload'))") - filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('first upload'))") - filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"first upload\"") - filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"first upload\"))") - filerTest{t, f}.assertContents(ctx, "jupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")") - - // Upload notebooks a second time, overwriting the initial uplaods - err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('second upload'))"), filer.OverwriteIfExists) - require.NoError(t, err) - err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('second upload'))"), filer.OverwriteIfExists) - require.NoError(t, err) - err = f.Write(ctx, "sqlNb.sql", strings.NewReader("-- Databricks notebook source\n SELECT \"second upload\""), filer.OverwriteIfExists) - require.NoError(t, err) - err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"second upload\"))"), filer.OverwriteIfExists) - require.NoError(t, err) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent2), filer.OverwriteIfExists) - require.NoError(t, err) - - // Assert contents have been overwritten - filerTest{t, f}.assertContents(ctx, "pyNb", "# Databricks notebook source\nprint('second upload'))") - filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('second upload'))") - filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"second upload\"") - filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"second upload\"))") - filerTest{t, f}.assertContents(ctx, "jupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 2\")") } func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { @@ -515,11 +484,13 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { {"foo.r", "print('foo')"}, {"foo.scala", "println('foo')"}, {"foo.sql", "SELECT 'foo'"}, - {"jupyterNb.ipynb", jupyterNotebookContent1}, - {"jupyterNb2.ipynb", jupyterNotebookContent2}, + {"py1.ipynb", readFile(t, "testdata/notebooks/py1.ipynb")}, {"pyNb.py", "# Databricks notebook source\nprint('first upload'))"}, + {"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")}, {"rNb.r", "# Databricks notebook source\nprint('first upload'))"}, + {"scala1.ipynb", readFile(t, "testdata/notebooks/scala1.ipynb")}, {"scalaNb.scala", "// Databricks notebook source\n println(\"first upload\"))"}, + {"sql1.ipynb", readFile(t, "testdata/notebooks/sql1.ipynb")}, {"sqlNb.sql", "-- Databricks notebook source\n SELECT \"first upload\""}, } @@ -554,11 +525,13 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { "foo.r", "foo.scala", "foo.sql", - "jupyterNb.ipynb", - "jupyterNb2.ipynb", + "py1.ipynb", "pyNb.py", + "r1.ipynb", "rNb.r", + "scala1.ipynb", "scalaNb.scala", + "sql1.ipynb", "sqlNb.sql", }, names) @@ -582,7 +555,10 @@ func setupFilerWithExtensionsTest(t *testing.T) filer.Filer { }{ {"foo.py", "# Databricks notebook source\nprint('first upload'))"}, {"bar.py", "print('foo')"}, - {"jupyter.ipynb", jupyterNotebookContent1}, + {"p1.ipynb", readFile(t, "testdata/notebooks/py1.ipynb")}, + {"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")}, + {"scala1.ipynb", readFile(t, "testdata/notebooks/scala1.ipynb")}, + {"sql1.ipynb", readFile(t, "testdata/notebooks/sql1.ipynb")}, {"pretender", "not a notebook"}, {"dir/file.txt", "file content"}, {"scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')"}, @@ -608,11 +584,15 @@ func TestAccFilerWorkspaceFilesExtensionsRead(t *testing.T) { // Read contents of test fixtures as a sanity check. filerTest{t, wf}.assertContents(ctx, "foo.py", "# Databricks notebook source\nprint('first upload'))") filerTest{t, wf}.assertContents(ctx, "bar.py", "print('foo')") - filerTest{t, wf}.assertContentsJupyter(ctx, "jupyter.ipynb") filerTest{t, wf}.assertContents(ctx, "dir/file.txt", "file content") filerTest{t, wf}.assertContents(ctx, "scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')") filerTest{t, wf}.assertContents(ctx, "pretender", "not a notebook") + filerTest{t, wf}.assertContentsJupyter(ctx, "p1.ipynb", "python") + filerTest{t, wf}.assertContentsJupyter(ctx, "r1.ipynb", "r") + filerTest{t, wf}.assertContentsJupyter(ctx, "scala1.ipynb", "scala") + filerTest{t, wf}.assertContentsJupyter(ctx, "sql1.ipynb", "sql") + // Read non-existent file _, err := wf.Read(ctx, "non-existent.py") assert.ErrorIs(t, err, fs.ErrNotExist) @@ -638,35 +618,41 @@ func TestAccFilerWorkspaceFilesExtensionsDelete(t *testing.T) { ctx := context.Background() wf := setupFilerWithExtensionsTest(t) - // Delete notebook - err := wf.Delete(ctx, "foo.py") - require.NoError(t, err) - filerTest{t, wf}.assertNotExists(ctx, "foo.py") + for _, fileName := range []string{ + // notebook + "foo.py", + // file + "bar.py", + // python jupyter notebook + "p1.ipynb", + // R jupyter notebook + "r1.ipynb", + // Scala jupyter notebook + "scala1.ipynb", + // SQL jupyter notebook + "sql1.ipynb", + } { + err := wf.Delete(ctx, fileName) + require.NoError(t, err) + filerTest{t, wf}.assertNotExists(ctx, fileName) + } - // Delete file - err = wf.Delete(ctx, "bar.py") - require.NoError(t, err) - filerTest{t, wf}.assertNotExists(ctx, "bar.py") - - // Delete jupyter notebook - err = wf.Delete(ctx, "jupyter.ipynb") - require.NoError(t, err) - filerTest{t, wf}.assertNotExists(ctx, "jupyter.ipynb") - - // Delete non-existent file - err = wf.Delete(ctx, "non-existent.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - - // Ensure we do not delete a file as a notebook - err = wf.Delete(ctx, "pretender.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - - // Ensure we do not delete a Scala notebook as a Python notebook - _, err = wf.Read(ctx, "scala-notebook.py") - assert.ErrorIs(t, err, fs.ErrNotExist) + for _, fileName := range []string{ + // do not delete non-existent file + "non-existent.py", + // do not delete a file assuming it is a notebook and stripping the extension + "pretender.py", + // do not delete a Scala notebook as a Python notebook + "scala-notebook.py", + // do not delete a file assuming it is a Jupyter notebook and stripping the extension + "pretender.ipynb", + } { + err := wf.Delete(ctx, fileName) + assert.ErrorIs(t, err, fs.ErrNotExist) + } // Delete directory - err = wf.Delete(ctx, "dir") + err := wf.Delete(ctx, "dir") assert.ErrorIs(t, err, fs.ErrInvalid) // Delete directory recursively @@ -681,44 +667,45 @@ func TestAccFilerWorkspaceFilesExtensionsStat(t *testing.T) { ctx := context.Background() wf := setupFilerWithExtensionsTest(t) - // Stat on a notebook - info, err := wf.Stat(ctx, "foo.py") - require.NoError(t, err) - assert.Equal(t, "foo.py", info.Name()) - assert.False(t, info.IsDir()) - - // Stat on a file - info, err = wf.Stat(ctx, "bar.py") - require.NoError(t, err) - assert.Equal(t, "bar.py", info.Name()) - assert.False(t, info.IsDir()) - - // Stat on a Jupyter notebook - info, err = wf.Stat(ctx, "jupyter.ipynb") - require.NoError(t, err) - assert.Equal(t, "jupyter.ipynb", info.Name()) - assert.False(t, info.IsDir()) + for _, fileName := range []string{ + // notebook + "foo.py", + // file + "bar.py", + // python jupyter notebook + "p1.ipynb", + // R jupyter notebook + "r1.ipynb", + // Scala jupyter notebook + "scala1.ipynb", + // SQL jupyter notebook + "sql1.ipynb", + } { + info, err := wf.Stat(ctx, fileName) + require.NoError(t, err) + assert.Equal(t, fileName, info.Name()) + assert.False(t, info.IsDir()) + } // Stat on a directory - info, err = wf.Stat(ctx, "dir") + info, err := wf.Stat(ctx, "dir") require.NoError(t, err) assert.Equal(t, "dir", info.Name()) assert.True(t, info.IsDir()) - // Stat on a non-existent file - _, err = wf.Stat(ctx, "non-existent.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - - // Ensure we do not stat a file as a notebook - _, err = wf.Stat(ctx, "pretender.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - - // Ensure we do not stat a Scala notebook as a Python notebook - _, err = wf.Stat(ctx, "scala-notebook.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - - _, err = wf.Stat(ctx, "pretender.ipynb") - assert.ErrorIs(t, err, fs.ErrNotExist) + for _, fileName := range []string{ + // non-existent file + "non-existent.py", + // do not stat a file assuming it is a notebook and stripping the extension + "pretender.py", + // do not stat a Scala notebook as a Python notebook + "scala-notebook.py", + // do not read a regular file assuming it is a Jupyter notebook and stripping the extension + "pretender.ipynb", + } { + _, err := wf.Stat(ctx, fileName) + assert.ErrorIs(t, err, fs.ErrNotExist) + } } func TestAccWorkspaceFilesExtensionsDirectoriesAreNotNotebooks(t *testing.T) { @@ -739,32 +726,115 @@ func TestAccWorkspaceFilesExtensionsDirectoriesAreNotNotebooks(t *testing.T) { func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) { t.Parallel() - ctx := context.Background() - wf, _ := setupWsfsExtensionsFiler(t) + // Case 1: Writing source notebooks. + for _, tc := range []struct { + language string + sourceName string + sourceContent string + jupyterName string + jupyterContent string + }{ + { + language: "python", + sourceName: "foo.py", + sourceContent: "# Databricks notebook source\nprint('foo')", + jupyterName: "foo.ipynb", + }, + { + language: "r", + sourceName: "foo.r", + sourceContent: "# Databricks notebook source\nprint('foo')", + jupyterName: "foo.ipynb", + }, + { + language: "scala", + sourceName: "foo.scala", + sourceContent: "// Databricks notebook source\nprintln('foo')", + jupyterName: "foo.ipynb", + }, + { + language: "sql", + sourceName: "foo.sql", + sourceContent: "-- Databricks notebook source\nselect 'foo'", + jupyterName: "foo.ipynb", + }, + } { + t.Run("source_"+tc.language, func(t *testing.T) { + t.Parallel() - // Case 1: Source Notebook - err := wf.Write(ctx, "foo.py", strings.NewReader("# Databricks notebook source\nprint('foo')")) - require.NoError(t, err) + ctx := context.Background() + wf, _ := setupWsfsExtensionsFiler(t) - // The source notebook should exist but not the Jupyter notebook - filerTest{t, wf}.assertContents(ctx, "foo.py", "# Databricks notebook source\nprint('foo')") - _, err = wf.Stat(ctx, "foo.ipynb") - assert.ErrorIs(t, err, fs.ErrNotExist) - _, err = wf.Read(ctx, "foo.ipynb") - assert.ErrorIs(t, err, fs.ErrNotExist) - err = wf.Delete(ctx, "foo.ipynb") - assert.ErrorIs(t, err, fs.ErrNotExist) + err := wf.Write(ctx, tc.sourceName, strings.NewReader(tc.sourceContent)) + require.NoError(t, err) - // Case 2: Jupyter Notebook - err = wf.Write(ctx, "bar.ipynb", strings.NewReader(jupyterNotebookContent1)) - require.NoError(t, err) + // Assert on the content of the source notebook that's been written. + filerTest{t, wf}.assertContents(ctx, tc.sourceName, tc.sourceContent) - // The Jupyter notebook should exist but not the source notebook - filerTest{t, wf}.assertContentsJupyter(ctx, "bar.ipynb") - _, err = wf.Stat(ctx, "bar.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - _, err = wf.Read(ctx, "bar.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - err = wf.Delete(ctx, "bar.py") - assert.ErrorIs(t, err, fs.ErrNotExist) + // Ensure that the source notebook is not read when the name contains + // the .ipynb extension. + _, err = wf.Stat(ctx, tc.jupyterName) + assert.ErrorIs(t, err, fs.ErrNotExist) + _, err = wf.Read(ctx, tc.jupyterName) + assert.ErrorIs(t, err, fs.ErrNotExist) + err = wf.Delete(ctx, tc.jupyterName) + assert.ErrorIs(t, err, fs.ErrNotExist) + }) + } + + // Case 2: Writing Jupyter notebooks. + for _, tc := range []struct { + language string + sourceName string + jupyterName string + jupyterContent string + }{ + { + language: "python", + sourceName: "foo.py", + jupyterName: "foo.ipynb", + jupyterContent: readFile(t, "testdata/notebooks/py1.ipynb"), + }, + { + language: "r", + sourceName: "foo.r", + jupyterName: "foo.ipynb", + jupyterContent: readFile(t, "testdata/notebooks/r1.ipynb"), + }, + { + language: "scala", + sourceName: "foo.scala", + jupyterName: "foo.ipynb", + jupyterContent: readFile(t, "testdata/notebooks/scala1.ipynb"), + }, + { + language: "sql", + sourceName: "foo.sql", + jupyterName: "foo.ipynb", + jupyterContent: readFile(t, "testdata/notebooks/sql1.ipynb"), + }, + } { + t.Run("jupyter_"+tc.language, func(t *testing.T) { + t.Parallel() + + ctx := context.Background() + wf, _ := setupWsfsExtensionsFiler(t) + + err := wf.Write(ctx, tc.jupyterName, strings.NewReader(tc.jupyterContent)) + require.NoError(t, err) + + // Assert that the written notebook is jupyter and has the correct + // language_info metadata set. + filerTest{t, wf}.assertContentsJupyter(ctx, tc.jupyterName, tc.language) + + // Ensure that the Jupyter notebook is not read when the name does not + // contain the .ipynb extension. + _, err = wf.Stat(ctx, tc.sourceName) + assert.ErrorIs(t, err, fs.ErrNotExist) + _, err = wf.Read(ctx, tc.sourceName) + assert.ErrorIs(t, err, fs.ErrNotExist) + err = wf.Delete(ctx, tc.sourceName) + assert.ErrorIs(t, err, fs.ErrNotExist) + }) + } } diff --git a/internal/helpers.go b/internal/helpers.go index 3bf38775..3e4b4e97 100644 --- a/internal/helpers.go +++ b/internal/helpers.go @@ -352,6 +352,13 @@ func RequireErrorRun(t *testing.T, args ...string) (bytes.Buffer, bytes.Buffer, return stdout, stderr, err } +func readFile(t *testing.T, name string) string { + b, err := os.ReadFile(name) + require.NoError(t, err) + + return string(b) +} + func writeFile(t *testing.T, name string, body string) string { f, err := os.Create(filepath.Join(t.TempDir(), name)) require.NoError(t, err) @@ -562,12 +569,10 @@ func setupLocalFiler(t *testing.T) (filer.Filer, string) { } func setupWsfsFiler(t *testing.T) (filer.Filer, string) { - t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + ctx, wt := acc.WorkspaceTest(t) - ctx := context.Background() - w := databricks.Must(databricks.NewWorkspaceClient()) - tmpdir := TemporaryWorkspaceDir(t, w) - f, err := filer.NewWorkspaceFilesClient(w, tmpdir) + tmpdir := TemporaryWorkspaceDir(t, wt.W) + f, err := filer.NewWorkspaceFilesClient(wt.W, tmpdir) require.NoError(t, err) // Check if we can use this API here, skip test if we cannot. @@ -581,11 +586,10 @@ func setupWsfsFiler(t *testing.T) (filer.Filer, string) { } func setupWsfsExtensionsFiler(t *testing.T) (filer.Filer, string) { - t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + _, wt := acc.WorkspaceTest(t) - w := databricks.Must(databricks.NewWorkspaceClient()) - tmpdir := TemporaryWorkspaceDir(t, w) - f, err := filer.NewWorkspaceFilesExtensionsClient(w, tmpdir) + tmpdir := TemporaryWorkspaceDir(t, wt.W) + f, err := filer.NewWorkspaceFilesExtensionsClient(wt.W, tmpdir) require.NoError(t, err) return f, tmpdir diff --git a/internal/testdata/notebooks/py1.ipynb b/internal/testdata/notebooks/py1.ipynb new file mode 100644 index 00000000..0a44ce0e --- /dev/null +++ b/internal/testdata/notebooks/py1.ipynb @@ -0,0 +1,27 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.8.13" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/py2.ipynb b/internal/testdata/notebooks/py2.ipynb new file mode 100644 index 00000000..8b2ccde1 --- /dev/null +++ b/internal/testdata/notebooks/py2.ipynb @@ -0,0 +1,27 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.8.13" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/r1.ipynb b/internal/testdata/notebooks/r1.ipynb new file mode 100644 index 00000000..6280426a --- /dev/null +++ b/internal/testdata/notebooks/r1.ipynb @@ -0,0 +1,25 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "name": "R" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/r2.ipynb b/internal/testdata/notebooks/r2.ipynb new file mode 100644 index 00000000..f2ff413d --- /dev/null +++ b/internal/testdata/notebooks/r2.ipynb @@ -0,0 +1,29 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "print(2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "name": "R" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/scala1.ipynb b/internal/testdata/notebooks/scala1.ipynb new file mode 100644 index 00000000..25a5a187 --- /dev/null +++ b/internal/testdata/notebooks/scala1.ipynb @@ -0,0 +1,38 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + } + ], + "source": [ + "println(1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Scala", + "language": "scala", + "name": "scala" + }, + "language_info": { + "codemirror_mode": "text/x-scala", + "file_extension": ".sc", + "mimetype": "text/x-scala", + "name": "scala", + "nbconvert_exporter": "script", + "version": "2.13.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/scala2.ipynb b/internal/testdata/notebooks/scala2.ipynb new file mode 100644 index 00000000..353fc29f --- /dev/null +++ b/internal/testdata/notebooks/scala2.ipynb @@ -0,0 +1,38 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + } + ], + "source": [ + "println(2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Scala", + "language": "scala", + "name": "scala" + }, + "language_info": { + "codemirror_mode": "text/x-scala", + "file_extension": ".sc", + "mimetype": "text/x-scala", + "name": "scala", + "nbconvert_exporter": "script", + "version": "2.13.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/sql1.ipynb b/internal/testdata/notebooks/sql1.ipynb new file mode 100644 index 00000000..7a3562a1 --- /dev/null +++ b/internal/testdata/notebooks/sql1.ipynb @@ -0,0 +1,20 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "select 1" + ] + } + ], + "metadata": { + "language_info": { + "name": "sql" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/sql2.ipynb b/internal/testdata/notebooks/sql2.ipynb new file mode 100644 index 00000000..7780e1da --- /dev/null +++ b/internal/testdata/notebooks/sql2.ipynb @@ -0,0 +1,20 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "select 2" + ] + } + ], + "metadata": { + "language_info": { + "name": "sql" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/libs/filer/workspace_files_extensions_client.go b/libs/filer/workspace_files_extensions_client.go index b24ecf7e..53b77dd5 100644 --- a/libs/filer/workspace_files_extensions_client.go +++ b/libs/filer/workspace_files_extensions_client.go @@ -7,6 +7,7 @@ import ( "io" "io/fs" "path" + "slices" "strings" "github.com/databricks/cli/libs/log" @@ -23,14 +24,6 @@ type workspaceFilesExtensionsClient struct { readonly bool } -var extensionsToLanguages = map[string]workspace.Language{ - ".py": workspace.LanguagePython, - ".r": workspace.LanguageR, - ".scala": workspace.LanguageScala, - ".sql": workspace.LanguageSql, - ".ipynb": workspace.LanguagePython, -} - type workspaceFileStatus struct { wsfsFileInfo @@ -54,7 +47,12 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex nameWithoutExt := strings.TrimSuffix(name, ext) // File name does not have an extension associated with Databricks notebooks, return early. - if _, ok := extensionsToLanguages[ext]; !ok { + if !slices.Contains([]string{ + notebook.ExtensionPython, + notebook.ExtensionR, + notebook.ExtensionScala, + notebook.ExtensionSql, + notebook.ExtensionJupyter}, ext) { return nil, nil } @@ -75,22 +73,23 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex return nil, nil } - // Not the correct language. Return early. - if stat.Language != extensionsToLanguages[ext] { - log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not of the correct language. Expected %s but found %s.", name, path.Join(w.root, nameWithoutExt), extensionsToLanguages[ext], stat.Language) + // Not the correct language. Return early. Note: All languages are supported + // for Jupyter notebooks. + if ext != notebook.ExtensionJupyter && stat.Language != notebook.ExtensionToLanguage[ext] { + log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not of the correct language. Expected %s but found %s.", name, path.Join(w.root, nameWithoutExt), notebook.ExtensionToLanguage[ext], stat.Language) return nil, nil } - // When the extension is .py we expect the export format to be source. + // For non-jupyter notebooks the export format should be source. // If it's not, return early. - if ext == ".py" && stat.ReposExportFormat != workspace.ExportFormatSource { + if ext != notebook.ExtensionJupyter && stat.ReposExportFormat != workspace.ExportFormatSource { log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not exported as a source notebook. Its export format is %s.", name, path.Join(w.root, nameWithoutExt), stat.ReposExportFormat) return nil, nil } // When the extension is .ipynb we expect the export format to be Jupyter. // If it's not, return early. - if ext == ".ipynb" && stat.ReposExportFormat != workspace.ExportFormatJupyter { + if ext == notebook.ExtensionJupyter && stat.ReposExportFormat != workspace.ExportFormatJupyter { log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not exported as a Jupyter notebook. Its export format is %s.", name, path.Join(w.root, nameWithoutExt), stat.ReposExportFormat) return nil, nil } @@ -120,8 +119,8 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx con ext := notebook.GetExtensionByLanguage(&stat.ObjectInfo) // If the notebook was exported as a Jupyter notebook, the extension should be .ipynb. - if stat.Language == workspace.LanguagePython && stat.ReposExportFormat == workspace.ExportFormatJupyter { - ext = ".ipynb" + if stat.ReposExportFormat == workspace.ExportFormatJupyter { + ext = notebook.ExtensionJupyter } // Modify the stat object path to include the extension. This stat object will be used diff --git a/libs/filer/workspace_files_extensions_client_test.go b/libs/filer/workspace_files_extensions_client_test.go index 321c4371..974a6a37 100644 --- a/libs/filer/workspace_files_extensions_client_test.go +++ b/libs/filer/workspace_files_extensions_client_test.go @@ -37,7 +37,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError string }{ { - name: "python source notebook and file", + name: "python source notebook and file with source extension", language: workspace.LanguagePython, notebookExportFormat: workspace.ExportFormatSource, notebookPath: "/dir/foo", @@ -45,7 +45,31 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.py resolve to the same name /foo.py. Changing the name of one of these objects will resolve this issue", }, { - name: "python jupyter notebook and file", + name: "scala source notebook and file with source extension", + language: workspace.LanguageScala, + notebookExportFormat: workspace.ExportFormatSource, + notebookPath: "/dir/foo", + filePath: "/dir/foo.scala", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.scala resolve to the same name /foo.scala. Changing the name of one of these objects will resolve this issue", + }, + { + name: "r source notebook and file with source extension", + language: workspace.LanguageR, + notebookExportFormat: workspace.ExportFormatSource, + notebookPath: "/dir/foo", + filePath: "/dir/foo.r", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.r resolve to the same name /foo.r. Changing the name of one of these objects will resolve this issue", + }, + { + name: "sql source notebook and file with source extension", + language: workspace.LanguageSql, + notebookExportFormat: workspace.ExportFormatSource, + notebookPath: "/dir/foo", + filePath: "/dir/foo.sql", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.sql resolve to the same name /foo.sql. Changing the name of one of these objects will resolve this issue", + }, + { + name: "python jupyter notebook and file with source extension", language: workspace.LanguagePython, notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", @@ -54,37 +78,64 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError: "", }, { - name: "scala source notebook and file", + name: "scala jupyter notebook and file with source extension", language: workspace.LanguageScala, - notebookExportFormat: workspace.ExportFormatSource, + notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", filePath: "/dir/foo.scala", - expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.scala resolve to the same name /foo.scala. Changing the name of one of these objects will resolve this issue", + // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. + expectedError: "", }, { - name: "r source notebook and file", - language: workspace.LanguageR, - notebookExportFormat: workspace.ExportFormatSource, - notebookPath: "/dir/foo", - filePath: "/dir/foo.r", - expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.r resolve to the same name /foo.r. Changing the name of one of these objects will resolve this issue", - }, - { - name: "sql source notebook and file", + name: "sql jupyter notebook and file with source extension", language: workspace.LanguageSql, - notebookExportFormat: workspace.ExportFormatSource, + notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", filePath: "/dir/foo.sql", - expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.sql resolve to the same name /foo.sql. Changing the name of one of these objects will resolve this issue", + // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. + expectedError: "", }, { - name: "python jupyter notebook and file", + name: "r jupyter notebook and file with source extension", + language: workspace.LanguageR, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.sql", + // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. + expectedError: "", + }, + { + name: "python jupyter notebook and file with .ipynb extension", language: workspace.LanguagePython, notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", filePath: "/dir/foo.ipynb", expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", }, + { + name: "scala jupyter notebook and file with .ipynb extension", + language: workspace.LanguageScala, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.ipynb", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", + }, + { + name: "r jupyter notebook and file with .ipynb extension", + language: workspace.LanguageR, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.ipynb", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", + }, + { + name: "sql jupyter notebook and file with .ipynb extension", + language: workspace.LanguageSql, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.ipynb", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", + }, } { t.Run(tc.name, func(t *testing.T) { mockedWorkspaceClient := mocks.NewMockWorkspaceClient(t) diff --git a/libs/notebook/detect.go b/libs/notebook/detect.go index 582a8847..cd8680bf 100644 --- a/libs/notebook/detect.go +++ b/libs/notebook/detect.go @@ -107,19 +107,19 @@ func DetectWithFS(fsys fs.FS, name string) (notebook bool, language workspace.La // Determine which header to expect based on filename extension. ext := strings.ToLower(filepath.Ext(name)) switch ext { - case ".py": + case ExtensionPython: header = `# Databricks notebook source` language = workspace.LanguagePython - case ".r": + case ExtensionR: header = `# Databricks notebook source` language = workspace.LanguageR - case ".scala": + case ExtensionScala: header = "// Databricks notebook source" language = workspace.LanguageScala - case ".sql": + case ExtensionSql: header = "-- Databricks notebook source" language = workspace.LanguageSql - case ".ipynb": + case ExtensionJupyter: return DetectJupyterWithFS(fsys, name) default: return false, "", nil diff --git a/libs/notebook/ext.go b/libs/notebook/ext.go index 28d08c11..c34ad2cc 100644 --- a/libs/notebook/ext.go +++ b/libs/notebook/ext.go @@ -2,22 +2,40 @@ package notebook import "github.com/databricks/databricks-sdk-go/service/workspace" +const ( + ExtensionNone string = "" + ExtensionPython string = ".py" + ExtensionR string = ".r" + ExtensionScala string = ".scala" + ExtensionSql string = ".sql" + ExtensionJupyter string = ".ipynb" +) + +var ExtensionToLanguage = map[string]workspace.Language{ + ExtensionPython: workspace.LanguagePython, + ExtensionR: workspace.LanguageR, + ExtensionScala: workspace.LanguageScala, + ExtensionSql: workspace.LanguageSql, + + // The platform supports all languages (Python, R, Scala, and SQL) for Jupyter notebooks. +} + func GetExtensionByLanguage(objectInfo *workspace.ObjectInfo) string { if objectInfo.ObjectType != workspace.ObjectTypeNotebook { - return "" + return ExtensionNone } switch objectInfo.Language { case workspace.LanguagePython: - return ".py" + return ExtensionPython case workspace.LanguageR: - return ".r" + return ExtensionR case workspace.LanguageScala: - return ".scala" + return ExtensionScala case workspace.LanguageSql: - return ".sql" + return ExtensionSql default: // Do not add any extension to the file name - return "" + return ExtensionNone } }