From e1978fa4293e5f034a2958ddaf7748fdb76ce9a8 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Thu, 14 Nov 2024 03:09:51 +0530 Subject: [PATCH] Add support for non-Python ipynb notebooks to DABs (#1827) ## Changes ### Background The workspace import APIs recently added support for importing Jupyter notebooks written in R, Scala, or SQL, that is non-Python notebooks. This now works for the `/import-file` API which we leverage in the CLI. Note: We do not need any changes in `databricks sync`. It works out of the box because any state mapping of local names to remote names that we store is only scoped to the notebook extension (i.e., `.ipynb` in this case) and is agnostic of the notebook's specific language. ### Problem this PR addresses The extension-aware filer previously did not function because it checks that a `.ipynb` notebook is written in Python. This PR relaxes that constraint and adds integration tests for both the normal workspace filer and extensions aware filer writing and reading non-Python `.ipynb` notebooks. This implies that after this PR DABs in the workspace / CLI from DBR will work for non-Python notebooks as well. non-Python notebooks for DABs deployment from local machines already works after the platform side changes to the API landed, this PR just adds integration tests for that bit of functionality. Note: Any platform side changes we needed for the import API have already been rolled out to production. ### Before DABs deploy would work fine for non-Python notebooks. But DABs deployments from DBR would not. ### After DABs deploys both from local machines and DBR will work fine. ## Testing For creating the `.ipynb` notebook fixtures used in the integration tests I created them directly from the VSCode UI. This ensures high fidelity with how users will create their non-Python notebooks locally. For Python notebooks this is supported out of the box by VSCode but for R and Scala notebooks this requires installing the Jupyter kernel for R and Scala on my local machine and using that from VSCode. For SQL, I ended up directly modifying the `language_info` field in the Jupyter metadata to create the test fixture. ### Discussion: Issues with configuring language at the cell level The language metadata for a Jupyter notebook is standardized at the notebook level (in the `language_info` field). Unfortunately, it's not standardized at the cell level. Thus, for example, if a user changes the language for their cell in VSCode (which is supported by the standard Jupyter VSCode integration), it'll cause a runtime error when the user actually attempts to run the notebook. This is because the cell-level metadata is encoded in a format specific to VSCode: ``` cells: []{ "vscode": { "languageId": "sql" } } ``` Supporting cell level languages is thus out of scope for this PR and can be revisited along with the workspace files team if there's strong customer interest. --- internal/filer_test.go | 504 ++++++++++-------- internal/helpers.go | 22 +- internal/testdata/notebooks/py1.ipynb | 27 + internal/testdata/notebooks/py2.ipynb | 27 + internal/testdata/notebooks/r1.ipynb | 25 + internal/testdata/notebooks/r2.ipynb | 29 + internal/testdata/notebooks/scala1.ipynb | 38 ++ internal/testdata/notebooks/scala2.ipynb | 38 ++ internal/testdata/notebooks/sql1.ipynb | 20 + internal/testdata/notebooks/sql2.ipynb | 20 + .../workspace_files_extensions_client.go | 33 +- .../workspace_files_extensions_client_test.go | 85 ++- libs/notebook/detect.go | 10 +- libs/notebook/ext.go | 30 +- 14 files changed, 637 insertions(+), 271 deletions(-) create mode 100644 internal/testdata/notebooks/py1.ipynb create mode 100644 internal/testdata/notebooks/py2.ipynb create mode 100644 internal/testdata/notebooks/r1.ipynb create mode 100644 internal/testdata/notebooks/r2.ipynb create mode 100644 internal/testdata/notebooks/scala1.ipynb create mode 100644 internal/testdata/notebooks/scala2.ipynb create mode 100644 internal/testdata/notebooks/sql1.ipynb create mode 100644 internal/testdata/notebooks/sql2.ipynb diff --git a/internal/filer_test.go b/internal/filer_test.go index bc4c9480..20207d34 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -39,7 +39,7 @@ func (f filerTest) assertContents(ctx context.Context, name string, contents str assert.Equal(f, contents, body.String()) } -func (f filerTest) assertContentsJupyter(ctx context.Context, name string) { +func (f filerTest) assertContentsJupyter(ctx context.Context, name string, language string) { reader, err := f.Read(ctx, name) if !assert.NoError(f, err) { return @@ -62,6 +62,7 @@ func (f filerTest) assertContentsJupyter(ctx context.Context, name string) { // Since a roundtrip to the workspace changes a Jupyter notebook's payload, // the best we can do is assert that the nbformat is correct. assert.EqualValues(f, 4, actual["nbformat"]) + assert.Equal(f, language, actual["metadata"].(map[string]any)["language_info"].(map[string]any)["name"]) } func (f filerTest) assertNotExists(ctx context.Context, name string) { @@ -360,146 +361,114 @@ func TestAccFilerReadDir(t *testing.T) { } } -var jupyterNotebookContent1 = ` -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Jupyter Notebook Version 1\")" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 - } -` - -var jupyterNotebookContent2 = ` -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(\"Jupyter Notebook Version 2\")" - ] - } - ], - "metadata": { - "language_info": { - "name": "python" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 - } -` - -func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { +func TestAccFilerWorkspaceNotebook(t *testing.T) { t.Parallel() - f, _ := setupWsfsFiler(t) ctx := context.Background() var err error - // Upload the notebooks - err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('first upload'))")) - require.NoError(t, err) - err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('first upload'))")) - require.NoError(t, err) - err = f.Write(ctx, "sqlNb.sql", strings.NewReader("-- Databricks notebook source\n SELECT \"first upload\"")) - require.NoError(t, err) - err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"first upload\"))")) - require.NoError(t, err) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent1)) - require.NoError(t, err) + tcases := []struct { + name string + nameWithoutExt string + content1 string + expected1 string + content2 string + expected2 string + }{ + { + name: "pyNb.py", + nameWithoutExt: "pyNb", + content1: "# Databricks notebook source\nprint('first upload')", + expected1: "# Databricks notebook source\nprint('first upload')", + content2: "# Databricks notebook source\nprint('second upload')", + expected2: "# Databricks notebook source\nprint('second upload')", + }, + { + name: "rNb.r", + nameWithoutExt: "rNb", + content1: "# Databricks notebook source\nprint('first upload')", + expected1: "# Databricks notebook source\nprint('first upload')", + content2: "# Databricks notebook source\nprint('second upload')", + expected2: "# Databricks notebook source\nprint('second upload')", + }, + { + name: "sqlNb.sql", + nameWithoutExt: "sqlNb", + content1: "-- Databricks notebook source\n SELECT \"first upload\"", + expected1: "-- Databricks notebook source\n SELECT \"first upload\"", + content2: "-- Databricks notebook source\n SELECT \"second upload\"", + expected2: "-- Databricks notebook source\n SELECT \"second upload\"", + }, + { + name: "scalaNb.scala", + nameWithoutExt: "scalaNb", + content1: "// Databricks notebook source\n println(\"first upload\")", + expected1: "// Databricks notebook source\n println(\"first upload\")", + content2: "// Databricks notebook source\n println(\"second upload\")", + expected2: "// Databricks notebook source\n println(\"second upload\")", + }, + { + name: "pythonJupyterNb.ipynb", + nameWithoutExt: "pythonJupyterNb", + content1: readFile(t, "testdata/notebooks/py1.ipynb"), + expected1: "# Databricks notebook source\nprint(1)", + content2: readFile(t, "testdata/notebooks/py2.ipynb"), + expected2: "# Databricks notebook source\nprint(2)", + }, + { + name: "rJupyterNb.ipynb", + nameWithoutExt: "rJupyterNb", + content1: readFile(t, "testdata/notebooks/r1.ipynb"), + expected1: "# Databricks notebook source\nprint(1)", + content2: readFile(t, "testdata/notebooks/r2.ipynb"), + expected2: "# Databricks notebook source\nprint(2)", + }, + { + name: "scalaJupyterNb.ipynb", + nameWithoutExt: "scalaJupyterNb", + content1: readFile(t, "testdata/notebooks/scala1.ipynb"), + expected1: "// Databricks notebook source\nprintln(1)", + content2: readFile(t, "testdata/notebooks/scala2.ipynb"), + expected2: "// Databricks notebook source\nprintln(2)", + }, + { + name: "sqlJupyterNotebook.ipynb", + nameWithoutExt: "sqlJupyterNotebook", + content1: readFile(t, "testdata/notebooks/sql1.ipynb"), + expected1: "-- Databricks notebook source\nselect 1", + content2: readFile(t, "testdata/notebooks/sql2.ipynb"), + expected2: "-- Databricks notebook source\nselect 2", + }, + } - // Assert contents after initial upload - filerTest{t, f}.assertContents(ctx, "pyNb", "# Databricks notebook source\nprint('first upload'))") - filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('first upload'))") - filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"first upload\"") - filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"first upload\"))") - filerTest{t, f}.assertContents(ctx, "jupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")") + for _, tc := range tcases { + f, _ := setupWsfsFiler(t) + t.Run(tc.name, func(t *testing.T) { + t.Parallel() - // Assert uploading a second time fails due to overwrite mode missing - err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('second upload'))")) - assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/pyNb$`), err.Error()) + // Upload the notebook + err = f.Write(ctx, tc.name, strings.NewReader(tc.content1)) + require.NoError(t, err) - err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('second upload'))")) - assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/rNb$`), err.Error()) + // Assert contents after initial upload. Note that we expect the content + // for jupyter notebooks to be of type source because the workspace files + // client always uses the source format to read notebooks from the workspace. + filerTest{t, f}.assertContents(ctx, tc.nameWithoutExt, tc.expected1) - err = f.Write(ctx, "sqlNb.sql", strings.NewReader("# Databricks notebook source\n SELECT \"second upload\")")) - assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/sqlNb$`), err.Error()) + // Assert uploading a second time fails due to overwrite mode missing + err = f.Write(ctx, tc.name, strings.NewReader(tc.content2)) + assert.ErrorIs(t, err, fs.ErrExist) + assert.Regexp(t, regexp.MustCompile(`file already exists: .*/`+tc.nameWithoutExt+`$`), err.Error()) - err = f.Write(ctx, "scalaNb.scala", strings.NewReader("# Databricks notebook source\n println(\"second upload\"))")) - assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/scalaNb$`), err.Error()) + // Try uploading the notebook again with overwrite flag. This time it should succeed. + err = f.Write(ctx, tc.name, strings.NewReader(tc.content2), filer.OverwriteIfExists) + require.NoError(t, err) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent2)) - assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/jupyterNb$`), err.Error()) -} + // Assert contents after second upload + filerTest{t, f}.assertContents(ctx, tc.nameWithoutExt, tc.expected2) + }) + } -func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) { - t.Parallel() - - f, _ := setupWsfsFiler(t) - ctx := context.Background() - var err error - - // Upload notebooks - err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('first upload'))")) - require.NoError(t, err) - err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('first upload'))")) - require.NoError(t, err) - err = f.Write(ctx, "sqlNb.sql", strings.NewReader("-- Databricks notebook source\n SELECT \"first upload\"")) - require.NoError(t, err) - err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"first upload\"))")) - require.NoError(t, err) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent1)) - require.NoError(t, err) - - // Assert contents after initial upload - filerTest{t, f}.assertContents(ctx, "pyNb", "# Databricks notebook source\nprint('first upload'))") - filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('first upload'))") - filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"first upload\"") - filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"first upload\"))") - filerTest{t, f}.assertContents(ctx, "jupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")") - - // Upload notebooks a second time, overwriting the initial uplaods - err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('second upload'))"), filer.OverwriteIfExists) - require.NoError(t, err) - err = f.Write(ctx, "rNb.r", strings.NewReader("# Databricks notebook source\nprint('second upload'))"), filer.OverwriteIfExists) - require.NoError(t, err) - err = f.Write(ctx, "sqlNb.sql", strings.NewReader("-- Databricks notebook source\n SELECT \"second upload\""), filer.OverwriteIfExists) - require.NoError(t, err) - err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"second upload\"))"), filer.OverwriteIfExists) - require.NoError(t, err) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent2), filer.OverwriteIfExists) - require.NoError(t, err) - - // Assert contents have been overwritten - filerTest{t, f}.assertContents(ctx, "pyNb", "# Databricks notebook source\nprint('second upload'))") - filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('second upload'))") - filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"second upload\"") - filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"second upload\"))") - filerTest{t, f}.assertContents(ctx, "jupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 2\")") } func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { @@ -515,11 +484,13 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { {"foo.r", "print('foo')"}, {"foo.scala", "println('foo')"}, {"foo.sql", "SELECT 'foo'"}, - {"jupyterNb.ipynb", jupyterNotebookContent1}, - {"jupyterNb2.ipynb", jupyterNotebookContent2}, + {"py1.ipynb", readFile(t, "testdata/notebooks/py1.ipynb")}, {"pyNb.py", "# Databricks notebook source\nprint('first upload'))"}, + {"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")}, {"rNb.r", "# Databricks notebook source\nprint('first upload'))"}, + {"scala1.ipynb", readFile(t, "testdata/notebooks/scala1.ipynb")}, {"scalaNb.scala", "// Databricks notebook source\n println(\"first upload\"))"}, + {"sql1.ipynb", readFile(t, "testdata/notebooks/sql1.ipynb")}, {"sqlNb.sql", "-- Databricks notebook source\n SELECT \"first upload\""}, } @@ -554,11 +525,13 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { "foo.r", "foo.scala", "foo.sql", - "jupyterNb.ipynb", - "jupyterNb2.ipynb", + "py1.ipynb", "pyNb.py", + "r1.ipynb", "rNb.r", + "scala1.ipynb", "scalaNb.scala", + "sql1.ipynb", "sqlNb.sql", }, names) @@ -582,7 +555,10 @@ func setupFilerWithExtensionsTest(t *testing.T) filer.Filer { }{ {"foo.py", "# Databricks notebook source\nprint('first upload'))"}, {"bar.py", "print('foo')"}, - {"jupyter.ipynb", jupyterNotebookContent1}, + {"p1.ipynb", readFile(t, "testdata/notebooks/py1.ipynb")}, + {"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")}, + {"scala1.ipynb", readFile(t, "testdata/notebooks/scala1.ipynb")}, + {"sql1.ipynb", readFile(t, "testdata/notebooks/sql1.ipynb")}, {"pretender", "not a notebook"}, {"dir/file.txt", "file content"}, {"scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')"}, @@ -608,11 +584,15 @@ func TestAccFilerWorkspaceFilesExtensionsRead(t *testing.T) { // Read contents of test fixtures as a sanity check. filerTest{t, wf}.assertContents(ctx, "foo.py", "# Databricks notebook source\nprint('first upload'))") filerTest{t, wf}.assertContents(ctx, "bar.py", "print('foo')") - filerTest{t, wf}.assertContentsJupyter(ctx, "jupyter.ipynb") filerTest{t, wf}.assertContents(ctx, "dir/file.txt", "file content") filerTest{t, wf}.assertContents(ctx, "scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')") filerTest{t, wf}.assertContents(ctx, "pretender", "not a notebook") + filerTest{t, wf}.assertContentsJupyter(ctx, "p1.ipynb", "python") + filerTest{t, wf}.assertContentsJupyter(ctx, "r1.ipynb", "r") + filerTest{t, wf}.assertContentsJupyter(ctx, "scala1.ipynb", "scala") + filerTest{t, wf}.assertContentsJupyter(ctx, "sql1.ipynb", "sql") + // Read non-existent file _, err := wf.Read(ctx, "non-existent.py") assert.ErrorIs(t, err, fs.ErrNotExist) @@ -638,35 +618,41 @@ func TestAccFilerWorkspaceFilesExtensionsDelete(t *testing.T) { ctx := context.Background() wf := setupFilerWithExtensionsTest(t) - // Delete notebook - err := wf.Delete(ctx, "foo.py") - require.NoError(t, err) - filerTest{t, wf}.assertNotExists(ctx, "foo.py") + for _, fileName := range []string{ + // notebook + "foo.py", + // file + "bar.py", + // python jupyter notebook + "p1.ipynb", + // R jupyter notebook + "r1.ipynb", + // Scala jupyter notebook + "scala1.ipynb", + // SQL jupyter notebook + "sql1.ipynb", + } { + err := wf.Delete(ctx, fileName) + require.NoError(t, err) + filerTest{t, wf}.assertNotExists(ctx, fileName) + } - // Delete file - err = wf.Delete(ctx, "bar.py") - require.NoError(t, err) - filerTest{t, wf}.assertNotExists(ctx, "bar.py") - - // Delete jupyter notebook - err = wf.Delete(ctx, "jupyter.ipynb") - require.NoError(t, err) - filerTest{t, wf}.assertNotExists(ctx, "jupyter.ipynb") - - // Delete non-existent file - err = wf.Delete(ctx, "non-existent.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - - // Ensure we do not delete a file as a notebook - err = wf.Delete(ctx, "pretender.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - - // Ensure we do not delete a Scala notebook as a Python notebook - _, err = wf.Read(ctx, "scala-notebook.py") - assert.ErrorIs(t, err, fs.ErrNotExist) + for _, fileName := range []string{ + // do not delete non-existent file + "non-existent.py", + // do not delete a file assuming it is a notebook and stripping the extension + "pretender.py", + // do not delete a Scala notebook as a Python notebook + "scala-notebook.py", + // do not delete a file assuming it is a Jupyter notebook and stripping the extension + "pretender.ipynb", + } { + err := wf.Delete(ctx, fileName) + assert.ErrorIs(t, err, fs.ErrNotExist) + } // Delete directory - err = wf.Delete(ctx, "dir") + err := wf.Delete(ctx, "dir") assert.ErrorIs(t, err, fs.ErrInvalid) // Delete directory recursively @@ -681,44 +667,45 @@ func TestAccFilerWorkspaceFilesExtensionsStat(t *testing.T) { ctx := context.Background() wf := setupFilerWithExtensionsTest(t) - // Stat on a notebook - info, err := wf.Stat(ctx, "foo.py") - require.NoError(t, err) - assert.Equal(t, "foo.py", info.Name()) - assert.False(t, info.IsDir()) - - // Stat on a file - info, err = wf.Stat(ctx, "bar.py") - require.NoError(t, err) - assert.Equal(t, "bar.py", info.Name()) - assert.False(t, info.IsDir()) - - // Stat on a Jupyter notebook - info, err = wf.Stat(ctx, "jupyter.ipynb") - require.NoError(t, err) - assert.Equal(t, "jupyter.ipynb", info.Name()) - assert.False(t, info.IsDir()) + for _, fileName := range []string{ + // notebook + "foo.py", + // file + "bar.py", + // python jupyter notebook + "p1.ipynb", + // R jupyter notebook + "r1.ipynb", + // Scala jupyter notebook + "scala1.ipynb", + // SQL jupyter notebook + "sql1.ipynb", + } { + info, err := wf.Stat(ctx, fileName) + require.NoError(t, err) + assert.Equal(t, fileName, info.Name()) + assert.False(t, info.IsDir()) + } // Stat on a directory - info, err = wf.Stat(ctx, "dir") + info, err := wf.Stat(ctx, "dir") require.NoError(t, err) assert.Equal(t, "dir", info.Name()) assert.True(t, info.IsDir()) - // Stat on a non-existent file - _, err = wf.Stat(ctx, "non-existent.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - - // Ensure we do not stat a file as a notebook - _, err = wf.Stat(ctx, "pretender.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - - // Ensure we do not stat a Scala notebook as a Python notebook - _, err = wf.Stat(ctx, "scala-notebook.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - - _, err = wf.Stat(ctx, "pretender.ipynb") - assert.ErrorIs(t, err, fs.ErrNotExist) + for _, fileName := range []string{ + // non-existent file + "non-existent.py", + // do not stat a file assuming it is a notebook and stripping the extension + "pretender.py", + // do not stat a Scala notebook as a Python notebook + "scala-notebook.py", + // do not read a regular file assuming it is a Jupyter notebook and stripping the extension + "pretender.ipynb", + } { + _, err := wf.Stat(ctx, fileName) + assert.ErrorIs(t, err, fs.ErrNotExist) + } } func TestAccWorkspaceFilesExtensionsDirectoriesAreNotNotebooks(t *testing.T) { @@ -739,32 +726,115 @@ func TestAccWorkspaceFilesExtensionsDirectoriesAreNotNotebooks(t *testing.T) { func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) { t.Parallel() - ctx := context.Background() - wf, _ := setupWsfsExtensionsFiler(t) + // Case 1: Writing source notebooks. + for _, tc := range []struct { + language string + sourceName string + sourceContent string + jupyterName string + jupyterContent string + }{ + { + language: "python", + sourceName: "foo.py", + sourceContent: "# Databricks notebook source\nprint('foo')", + jupyterName: "foo.ipynb", + }, + { + language: "r", + sourceName: "foo.r", + sourceContent: "# Databricks notebook source\nprint('foo')", + jupyterName: "foo.ipynb", + }, + { + language: "scala", + sourceName: "foo.scala", + sourceContent: "// Databricks notebook source\nprintln('foo')", + jupyterName: "foo.ipynb", + }, + { + language: "sql", + sourceName: "foo.sql", + sourceContent: "-- Databricks notebook source\nselect 'foo'", + jupyterName: "foo.ipynb", + }, + } { + t.Run("source_"+tc.language, func(t *testing.T) { + t.Parallel() - // Case 1: Source Notebook - err := wf.Write(ctx, "foo.py", strings.NewReader("# Databricks notebook source\nprint('foo')")) - require.NoError(t, err) + ctx := context.Background() + wf, _ := setupWsfsExtensionsFiler(t) - // The source notebook should exist but not the Jupyter notebook - filerTest{t, wf}.assertContents(ctx, "foo.py", "# Databricks notebook source\nprint('foo')") - _, err = wf.Stat(ctx, "foo.ipynb") - assert.ErrorIs(t, err, fs.ErrNotExist) - _, err = wf.Read(ctx, "foo.ipynb") - assert.ErrorIs(t, err, fs.ErrNotExist) - err = wf.Delete(ctx, "foo.ipynb") - assert.ErrorIs(t, err, fs.ErrNotExist) + err := wf.Write(ctx, tc.sourceName, strings.NewReader(tc.sourceContent)) + require.NoError(t, err) - // Case 2: Jupyter Notebook - err = wf.Write(ctx, "bar.ipynb", strings.NewReader(jupyterNotebookContent1)) - require.NoError(t, err) + // Assert on the content of the source notebook that's been written. + filerTest{t, wf}.assertContents(ctx, tc.sourceName, tc.sourceContent) - // The Jupyter notebook should exist but not the source notebook - filerTest{t, wf}.assertContentsJupyter(ctx, "bar.ipynb") - _, err = wf.Stat(ctx, "bar.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - _, err = wf.Read(ctx, "bar.py") - assert.ErrorIs(t, err, fs.ErrNotExist) - err = wf.Delete(ctx, "bar.py") - assert.ErrorIs(t, err, fs.ErrNotExist) + // Ensure that the source notebook is not read when the name contains + // the .ipynb extension. + _, err = wf.Stat(ctx, tc.jupyterName) + assert.ErrorIs(t, err, fs.ErrNotExist) + _, err = wf.Read(ctx, tc.jupyterName) + assert.ErrorIs(t, err, fs.ErrNotExist) + err = wf.Delete(ctx, tc.jupyterName) + assert.ErrorIs(t, err, fs.ErrNotExist) + }) + } + + // Case 2: Writing Jupyter notebooks. + for _, tc := range []struct { + language string + sourceName string + jupyterName string + jupyterContent string + }{ + { + language: "python", + sourceName: "foo.py", + jupyterName: "foo.ipynb", + jupyterContent: readFile(t, "testdata/notebooks/py1.ipynb"), + }, + { + language: "r", + sourceName: "foo.r", + jupyterName: "foo.ipynb", + jupyterContent: readFile(t, "testdata/notebooks/r1.ipynb"), + }, + { + language: "scala", + sourceName: "foo.scala", + jupyterName: "foo.ipynb", + jupyterContent: readFile(t, "testdata/notebooks/scala1.ipynb"), + }, + { + language: "sql", + sourceName: "foo.sql", + jupyterName: "foo.ipynb", + jupyterContent: readFile(t, "testdata/notebooks/sql1.ipynb"), + }, + } { + t.Run("jupyter_"+tc.language, func(t *testing.T) { + t.Parallel() + + ctx := context.Background() + wf, _ := setupWsfsExtensionsFiler(t) + + err := wf.Write(ctx, tc.jupyterName, strings.NewReader(tc.jupyterContent)) + require.NoError(t, err) + + // Assert that the written notebook is jupyter and has the correct + // language_info metadata set. + filerTest{t, wf}.assertContentsJupyter(ctx, tc.jupyterName, tc.language) + + // Ensure that the Jupyter notebook is not read when the name does not + // contain the .ipynb extension. + _, err = wf.Stat(ctx, tc.sourceName) + assert.ErrorIs(t, err, fs.ErrNotExist) + _, err = wf.Read(ctx, tc.sourceName) + assert.ErrorIs(t, err, fs.ErrNotExist) + err = wf.Delete(ctx, tc.sourceName) + assert.ErrorIs(t, err, fs.ErrNotExist) + }) + } } diff --git a/internal/helpers.go b/internal/helpers.go index 3bf38775..3e4b4e97 100644 --- a/internal/helpers.go +++ b/internal/helpers.go @@ -352,6 +352,13 @@ func RequireErrorRun(t *testing.T, args ...string) (bytes.Buffer, bytes.Buffer, return stdout, stderr, err } +func readFile(t *testing.T, name string) string { + b, err := os.ReadFile(name) + require.NoError(t, err) + + return string(b) +} + func writeFile(t *testing.T, name string, body string) string { f, err := os.Create(filepath.Join(t.TempDir(), name)) require.NoError(t, err) @@ -562,12 +569,10 @@ func setupLocalFiler(t *testing.T) (filer.Filer, string) { } func setupWsfsFiler(t *testing.T) (filer.Filer, string) { - t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + ctx, wt := acc.WorkspaceTest(t) - ctx := context.Background() - w := databricks.Must(databricks.NewWorkspaceClient()) - tmpdir := TemporaryWorkspaceDir(t, w) - f, err := filer.NewWorkspaceFilesClient(w, tmpdir) + tmpdir := TemporaryWorkspaceDir(t, wt.W) + f, err := filer.NewWorkspaceFilesClient(wt.W, tmpdir) require.NoError(t, err) // Check if we can use this API here, skip test if we cannot. @@ -581,11 +586,10 @@ func setupWsfsFiler(t *testing.T) (filer.Filer, string) { } func setupWsfsExtensionsFiler(t *testing.T) (filer.Filer, string) { - t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + _, wt := acc.WorkspaceTest(t) - w := databricks.Must(databricks.NewWorkspaceClient()) - tmpdir := TemporaryWorkspaceDir(t, w) - f, err := filer.NewWorkspaceFilesExtensionsClient(w, tmpdir) + tmpdir := TemporaryWorkspaceDir(t, wt.W) + f, err := filer.NewWorkspaceFilesExtensionsClient(wt.W, tmpdir) require.NoError(t, err) return f, tmpdir diff --git a/internal/testdata/notebooks/py1.ipynb b/internal/testdata/notebooks/py1.ipynb new file mode 100644 index 00000000..0a44ce0e --- /dev/null +++ b/internal/testdata/notebooks/py1.ipynb @@ -0,0 +1,27 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.8.13" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/py2.ipynb b/internal/testdata/notebooks/py2.ipynb new file mode 100644 index 00000000..8b2ccde1 --- /dev/null +++ b/internal/testdata/notebooks/py2.ipynb @@ -0,0 +1,27 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.8.13" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/r1.ipynb b/internal/testdata/notebooks/r1.ipynb new file mode 100644 index 00000000..6280426a --- /dev/null +++ b/internal/testdata/notebooks/r1.ipynb @@ -0,0 +1,25 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "name": "R" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/r2.ipynb b/internal/testdata/notebooks/r2.ipynb new file mode 100644 index 00000000..f2ff413d --- /dev/null +++ b/internal/testdata/notebooks/r2.ipynb @@ -0,0 +1,29 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "r" + } + }, + "outputs": [], + "source": [ + "print(2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "R", + "language": "R", + "name": "ir" + }, + "language_info": { + "name": "R" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/scala1.ipynb b/internal/testdata/notebooks/scala1.ipynb new file mode 100644 index 00000000..25a5a187 --- /dev/null +++ b/internal/testdata/notebooks/scala1.ipynb @@ -0,0 +1,38 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + } + ], + "source": [ + "println(1)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Scala", + "language": "scala", + "name": "scala" + }, + "language_info": { + "codemirror_mode": "text/x-scala", + "file_extension": ".sc", + "mimetype": "text/x-scala", + "name": "scala", + "nbconvert_exporter": "script", + "version": "2.13.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/scala2.ipynb b/internal/testdata/notebooks/scala2.ipynb new file mode 100644 index 00000000..353fc29f --- /dev/null +++ b/internal/testdata/notebooks/scala2.ipynb @@ -0,0 +1,38 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + } + ], + "source": [ + "println(2)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Scala", + "language": "scala", + "name": "scala" + }, + "language_info": { + "codemirror_mode": "text/x-scala", + "file_extension": ".sc", + "mimetype": "text/x-scala", + "name": "scala", + "nbconvert_exporter": "script", + "version": "2.13.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/sql1.ipynb b/internal/testdata/notebooks/sql1.ipynb new file mode 100644 index 00000000..7a3562a1 --- /dev/null +++ b/internal/testdata/notebooks/sql1.ipynb @@ -0,0 +1,20 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "select 1" + ] + } + ], + "metadata": { + "language_info": { + "name": "sql" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/internal/testdata/notebooks/sql2.ipynb b/internal/testdata/notebooks/sql2.ipynb new file mode 100644 index 00000000..7780e1da --- /dev/null +++ b/internal/testdata/notebooks/sql2.ipynb @@ -0,0 +1,20 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "select 2" + ] + } + ], + "metadata": { + "language_info": { + "name": "sql" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/libs/filer/workspace_files_extensions_client.go b/libs/filer/workspace_files_extensions_client.go index b24ecf7e..53b77dd5 100644 --- a/libs/filer/workspace_files_extensions_client.go +++ b/libs/filer/workspace_files_extensions_client.go @@ -7,6 +7,7 @@ import ( "io" "io/fs" "path" + "slices" "strings" "github.com/databricks/cli/libs/log" @@ -23,14 +24,6 @@ type workspaceFilesExtensionsClient struct { readonly bool } -var extensionsToLanguages = map[string]workspace.Language{ - ".py": workspace.LanguagePython, - ".r": workspace.LanguageR, - ".scala": workspace.LanguageScala, - ".sql": workspace.LanguageSql, - ".ipynb": workspace.LanguagePython, -} - type workspaceFileStatus struct { wsfsFileInfo @@ -54,7 +47,12 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex nameWithoutExt := strings.TrimSuffix(name, ext) // File name does not have an extension associated with Databricks notebooks, return early. - if _, ok := extensionsToLanguages[ext]; !ok { + if !slices.Contains([]string{ + notebook.ExtensionPython, + notebook.ExtensionR, + notebook.ExtensionScala, + notebook.ExtensionSql, + notebook.ExtensionJupyter}, ext) { return nil, nil } @@ -75,22 +73,23 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex return nil, nil } - // Not the correct language. Return early. - if stat.Language != extensionsToLanguages[ext] { - log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not of the correct language. Expected %s but found %s.", name, path.Join(w.root, nameWithoutExt), extensionsToLanguages[ext], stat.Language) + // Not the correct language. Return early. Note: All languages are supported + // for Jupyter notebooks. + if ext != notebook.ExtensionJupyter && stat.Language != notebook.ExtensionToLanguage[ext] { + log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not of the correct language. Expected %s but found %s.", name, path.Join(w.root, nameWithoutExt), notebook.ExtensionToLanguage[ext], stat.Language) return nil, nil } - // When the extension is .py we expect the export format to be source. + // For non-jupyter notebooks the export format should be source. // If it's not, return early. - if ext == ".py" && stat.ReposExportFormat != workspace.ExportFormatSource { + if ext != notebook.ExtensionJupyter && stat.ReposExportFormat != workspace.ExportFormatSource { log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not exported as a source notebook. Its export format is %s.", name, path.Join(w.root, nameWithoutExt), stat.ReposExportFormat) return nil, nil } // When the extension is .ipynb we expect the export format to be Jupyter. // If it's not, return early. - if ext == ".ipynb" && stat.ReposExportFormat != workspace.ExportFormatJupyter { + if ext == notebook.ExtensionJupyter && stat.ReposExportFormat != workspace.ExportFormatJupyter { log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not exported as a Jupyter notebook. Its export format is %s.", name, path.Join(w.root, nameWithoutExt), stat.ReposExportFormat) return nil, nil } @@ -120,8 +119,8 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx con ext := notebook.GetExtensionByLanguage(&stat.ObjectInfo) // If the notebook was exported as a Jupyter notebook, the extension should be .ipynb. - if stat.Language == workspace.LanguagePython && stat.ReposExportFormat == workspace.ExportFormatJupyter { - ext = ".ipynb" + if stat.ReposExportFormat == workspace.ExportFormatJupyter { + ext = notebook.ExtensionJupyter } // Modify the stat object path to include the extension. This stat object will be used diff --git a/libs/filer/workspace_files_extensions_client_test.go b/libs/filer/workspace_files_extensions_client_test.go index 321c4371..974a6a37 100644 --- a/libs/filer/workspace_files_extensions_client_test.go +++ b/libs/filer/workspace_files_extensions_client_test.go @@ -37,7 +37,7 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError string }{ { - name: "python source notebook and file", + name: "python source notebook and file with source extension", language: workspace.LanguagePython, notebookExportFormat: workspace.ExportFormatSource, notebookPath: "/dir/foo", @@ -45,7 +45,31 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.py resolve to the same name /foo.py. Changing the name of one of these objects will resolve this issue", }, { - name: "python jupyter notebook and file", + name: "scala source notebook and file with source extension", + language: workspace.LanguageScala, + notebookExportFormat: workspace.ExportFormatSource, + notebookPath: "/dir/foo", + filePath: "/dir/foo.scala", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.scala resolve to the same name /foo.scala. Changing the name of one of these objects will resolve this issue", + }, + { + name: "r source notebook and file with source extension", + language: workspace.LanguageR, + notebookExportFormat: workspace.ExportFormatSource, + notebookPath: "/dir/foo", + filePath: "/dir/foo.r", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.r resolve to the same name /foo.r. Changing the name of one of these objects will resolve this issue", + }, + { + name: "sql source notebook and file with source extension", + language: workspace.LanguageSql, + notebookExportFormat: workspace.ExportFormatSource, + notebookPath: "/dir/foo", + filePath: "/dir/foo.sql", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.sql resolve to the same name /foo.sql. Changing the name of one of these objects will resolve this issue", + }, + { + name: "python jupyter notebook and file with source extension", language: workspace.LanguagePython, notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", @@ -54,37 +78,64 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { expectedError: "", }, { - name: "scala source notebook and file", + name: "scala jupyter notebook and file with source extension", language: workspace.LanguageScala, - notebookExportFormat: workspace.ExportFormatSource, + notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", filePath: "/dir/foo.scala", - expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.scala resolve to the same name /foo.scala. Changing the name of one of these objects will resolve this issue", + // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. + expectedError: "", }, { - name: "r source notebook and file", - language: workspace.LanguageR, - notebookExportFormat: workspace.ExportFormatSource, - notebookPath: "/dir/foo", - filePath: "/dir/foo.r", - expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.r resolve to the same name /foo.r. Changing the name of one of these objects will resolve this issue", - }, - { - name: "sql source notebook and file", + name: "sql jupyter notebook and file with source extension", language: workspace.LanguageSql, - notebookExportFormat: workspace.ExportFormatSource, + notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", filePath: "/dir/foo.sql", - expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.sql resolve to the same name /foo.sql. Changing the name of one of these objects will resolve this issue", + // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. + expectedError: "", }, { - name: "python jupyter notebook and file", + name: "r jupyter notebook and file with source extension", + language: workspace.LanguageR, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.sql", + // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. + expectedError: "", + }, + { + name: "python jupyter notebook and file with .ipynb extension", language: workspace.LanguagePython, notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", filePath: "/dir/foo.ipynb", expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", }, + { + name: "scala jupyter notebook and file with .ipynb extension", + language: workspace.LanguageScala, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.ipynb", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", + }, + { + name: "r jupyter notebook and file with .ipynb extension", + language: workspace.LanguageR, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.ipynb", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", + }, + { + name: "sql jupyter notebook and file with .ipynb extension", + language: workspace.LanguageSql, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.ipynb", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", + }, } { t.Run(tc.name, func(t *testing.T) { mockedWorkspaceClient := mocks.NewMockWorkspaceClient(t) diff --git a/libs/notebook/detect.go b/libs/notebook/detect.go index 582a8847..cd8680bf 100644 --- a/libs/notebook/detect.go +++ b/libs/notebook/detect.go @@ -107,19 +107,19 @@ func DetectWithFS(fsys fs.FS, name string) (notebook bool, language workspace.La // Determine which header to expect based on filename extension. ext := strings.ToLower(filepath.Ext(name)) switch ext { - case ".py": + case ExtensionPython: header = `# Databricks notebook source` language = workspace.LanguagePython - case ".r": + case ExtensionR: header = `# Databricks notebook source` language = workspace.LanguageR - case ".scala": + case ExtensionScala: header = "// Databricks notebook source" language = workspace.LanguageScala - case ".sql": + case ExtensionSql: header = "-- Databricks notebook source" language = workspace.LanguageSql - case ".ipynb": + case ExtensionJupyter: return DetectJupyterWithFS(fsys, name) default: return false, "", nil diff --git a/libs/notebook/ext.go b/libs/notebook/ext.go index 28d08c11..c34ad2cc 100644 --- a/libs/notebook/ext.go +++ b/libs/notebook/ext.go @@ -2,22 +2,40 @@ package notebook import "github.com/databricks/databricks-sdk-go/service/workspace" +const ( + ExtensionNone string = "" + ExtensionPython string = ".py" + ExtensionR string = ".r" + ExtensionScala string = ".scala" + ExtensionSql string = ".sql" + ExtensionJupyter string = ".ipynb" +) + +var ExtensionToLanguage = map[string]workspace.Language{ + ExtensionPython: workspace.LanguagePython, + ExtensionR: workspace.LanguageR, + ExtensionScala: workspace.LanguageScala, + ExtensionSql: workspace.LanguageSql, + + // The platform supports all languages (Python, R, Scala, and SQL) for Jupyter notebooks. +} + func GetExtensionByLanguage(objectInfo *workspace.ObjectInfo) string { if objectInfo.ObjectType != workspace.ObjectTypeNotebook { - return "" + return ExtensionNone } switch objectInfo.Language { case workspace.LanguagePython: - return ".py" + return ExtensionPython case workspace.LanguageR: - return ".r" + return ExtensionR case workspace.LanguageScala: - return ".scala" + return ExtensionScala case workspace.LanguageSql: - return ".sql" + return ExtensionSql default: // Do not add any extension to the file name - return "" + return ExtensionNone } }