diff --git a/internal/filer_test.go b/internal/filer_test.go index bc4c9480..04db494a 100644 --- a/internal/filer_test.go +++ b/internal/filer_test.go @@ -360,7 +360,7 @@ func TestAccFilerReadDir(t *testing.T) { } } -var jupyterNotebookContent1 = ` +var pythonJupyterNotebookContent1 = ` { "cells": [ { @@ -384,7 +384,10 @@ var jupyterNotebookContent1 = ` } ` -var jupyterNotebookContent2 = ` +// TODO: Does detect notebook work with notebooks exported from databricks? +// They typically do not have the top level "language" key set. + +var pythonJupyterNotebookContent2 = ` { "cells": [ { @@ -408,6 +411,10 @@ var jupyterNotebookContent2 = ` } ` +// TODO: Continue adding tests for other types of notebooks than python here. +// Exporting from a workspace makes this work easier. + + func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { t.Parallel() @@ -424,7 +431,7 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { require.NoError(t, err) err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"first upload\"))")) require.NoError(t, err) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent1)) + err = f.Write(ctx, "pythonJupyterNb.ipynb", strings.NewReader(pythonJupyterNotebookContent1)) require.NoError(t, err) // Assert contents after initial upload @@ -432,7 +439,7 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('first upload'))") filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"first upload\"") filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"first upload\"))") - filerTest{t, f}.assertContents(ctx, "jupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")") + filerTest{t, f}.assertContents(ctx, "pythonJupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")") // Assert uploading a second time fails due to overwrite mode missing err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('second upload'))")) @@ -451,9 +458,9 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) { assert.ErrorIs(t, err, fs.ErrExist) assert.Regexp(t, regexp.MustCompile(`file already exists: .*/scalaNb$`), err.Error()) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent2)) + err = f.Write(ctx, "pythonJupyterNb.ipynb", strings.NewReader(pythonJupyterNotebookContent2)) assert.ErrorIs(t, err, fs.ErrExist) - assert.Regexp(t, regexp.MustCompile(`file already exists: .*/jupyterNb$`), err.Error()) + assert.Regexp(t, regexp.MustCompile(`file already exists: .*/pythonJupyterNb$`), err.Error()) } func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) { @@ -472,7 +479,7 @@ func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) { require.NoError(t, err) err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"first upload\"))")) require.NoError(t, err) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent1)) + err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(pythonJupyterNotebookContent1)) require.NoError(t, err) // Assert contents after initial upload @@ -491,7 +498,7 @@ func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) { require.NoError(t, err) err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"second upload\"))"), filer.OverwriteIfExists) require.NoError(t, err) - err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent2), filer.OverwriteIfExists) + err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(pythonJupyterNotebookContent2), filer.OverwriteIfExists) require.NoError(t, err) // Assert contents have been overwritten @@ -515,8 +522,8 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) { {"foo.r", "print('foo')"}, {"foo.scala", "println('foo')"}, {"foo.sql", "SELECT 'foo'"}, - {"jupyterNb.ipynb", jupyterNotebookContent1}, - {"jupyterNb2.ipynb", jupyterNotebookContent2}, + {"jupyterNb.ipynb", pythonJupyterNotebookContent1}, + {"jupyterNb2.ipynb", pythonJupyterNotebookContent2}, {"pyNb.py", "# Databricks notebook source\nprint('first upload'))"}, {"rNb.r", "# Databricks notebook source\nprint('first upload'))"}, {"scalaNb.scala", "// Databricks notebook source\n println(\"first upload\"))"}, @@ -582,7 +589,7 @@ func setupFilerWithExtensionsTest(t *testing.T) filer.Filer { }{ {"foo.py", "# Databricks notebook source\nprint('first upload'))"}, {"bar.py", "print('foo')"}, - {"jupyter.ipynb", jupyterNotebookContent1}, + {"jupyter.ipynb", pythonJupyterNotebookContent1}, {"pretender", "not a notebook"}, {"dir/file.txt", "file content"}, {"scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')"}, @@ -756,7 +763,7 @@ func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) { assert.ErrorIs(t, err, fs.ErrNotExist) // Case 2: Jupyter Notebook - err = wf.Write(ctx, "bar.ipynb", strings.NewReader(jupyterNotebookContent1)) + err = wf.Write(ctx, "bar.ipynb", strings.NewReader(pythonJupyterNotebookContent1)) require.NoError(t, err) // The Jupyter notebook should exist but not the source notebook diff --git a/internal/helpers.go b/internal/helpers.go index 9387706b..e99c951d 100644 --- a/internal/helpers.go +++ b/internal/helpers.go @@ -20,6 +20,7 @@ import ( "time" "github.com/databricks/cli/cmd/root" + "github.com/databricks/cli/internal/acc" "github.com/databricks/cli/libs/flags" "github.com/databricks/cli/cmd" @@ -561,12 +562,10 @@ func setupLocalFiler(t *testing.T) (filer.Filer, string) { } func setupWsfsFiler(t *testing.T) (filer.Filer, string) { - t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV")) + ctx, w := acc.WorkspaceTest(t) - ctx := context.Background() - w := databricks.Must(databricks.NewWorkspaceClient()) - tmpdir := TemporaryWorkspaceDir(t, w) - f, err := filer.NewWorkspaceFilesClient(w, tmpdir) + tmpdir := TemporaryWorkspaceDir(t, w.W) + f, err := filer.NewWorkspaceFilesClient(w.W, tmpdir) require.NoError(t, err) // Check if we can use this API here, skip test if we cannot. diff --git a/libs/filer/workspace_files_extensions_client.go b/libs/filer/workspace_files_extensions_client.go index b24ecf7e..509fe893 100644 --- a/libs/filer/workspace_files_extensions_client.go +++ b/libs/filer/workspace_files_extensions_client.go @@ -28,6 +28,9 @@ var extensionsToLanguages = map[string]workspace.Language{ ".r": workspace.LanguageR, ".scala": workspace.LanguageScala, ".sql": workspace.LanguageSql, + + // The platform supports all languages (Python, R, Scala, and SQL) for Jupyter notebooks. + // Thus, we do not need to check the language for .ipynb files. ".ipynb": workspace.LanguagePython, } @@ -47,6 +50,10 @@ func (w *workspaceFilesExtensionsClient) stat(ctx context.Context, name string) return info.(wsfsFileInfo), err } +// TODO: Add end to end tests that the filer works for all .ipynb cases. +// TODO: Also fix the sync issues. OR add tests that sync works fine with non +// python notebooks. Is this needed in the first place? + // This function returns the stat for the provided notebook. The stat object itself contains the path // with the extension since it is meant to be used in the context of a fs.FileInfo. func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx context.Context, name string) (*workspaceFileStatus, error) { @@ -75,8 +82,9 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex return nil, nil } - // Not the correct language. Return early. - if stat.Language != extensionsToLanguages[ext] { + // Not the correct language. Return early. Note: All languages are supported + // for Jupyter notebooks. + if ext != ".ipynb" && stat.Language != extensionsToLanguages[ext] { log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not of the correct language. Expected %s but found %s.", name, path.Join(w.root, nameWithoutExt), extensionsToLanguages[ext], stat.Language) return nil, nil } @@ -120,7 +128,8 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx con ext := notebook.GetExtensionByLanguage(&stat.ObjectInfo) // If the notebook was exported as a Jupyter notebook, the extension should be .ipynb. - if stat.Language == workspace.LanguagePython && stat.ReposExportFormat == workspace.ExportFormatJupyter { + // TODO: Test this. + if stat.ReposExportFormat == workspace.ExportFormatJupyter { ext = ".ipynb" } diff --git a/libs/filer/workspace_files_extensions_client_test.go b/libs/filer/workspace_files_extensions_client_test.go index 321c4371..1dd8a242 100644 --- a/libs/filer/workspace_files_extensions_client_test.go +++ b/libs/filer/workspace_files_extensions_client_test.go @@ -44,15 +44,6 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { filePath: "/dir/foo.py", expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.py resolve to the same name /foo.py. Changing the name of one of these objects will resolve this issue", }, - { - name: "python jupyter notebook and file", - language: workspace.LanguagePython, - notebookExportFormat: workspace.ExportFormatJupyter, - notebookPath: "/dir/foo", - filePath: "/dir/foo.py", - // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. - expectedError: "", - }, { name: "scala source notebook and file", language: workspace.LanguageScala, @@ -82,6 +73,66 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) { language: workspace.LanguagePython, notebookExportFormat: workspace.ExportFormatJupyter, notebookPath: "/dir/foo", + filePath: "/dir/foo.py", + // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. + expectedError: "", + }, + { + name: "scala jupyter notebook and file", + language: workspace.LanguageScala, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.scala", + // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. + expectedError: "", + }, + { + name: "sql jupyter notebook and file", + language: workspace.LanguageSql, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.sql", + // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. + expectedError: "", + }, + { + name: "r jupyter notebook and file", + language: workspace.LanguageR, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.sql", + // Jupyter notebooks would correspond to foo.ipynb so an error is not expected. + expectedError: "", + }, + { + name: "python jupyter notebook and file", + language: workspace.LanguagePython, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.ipynb", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", + }, + { + name: "scala jupyter notebook and file", + language: workspace.LanguageScala, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.ipynb", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", + }, + { + name: "r jupyter notebook and file", + language: workspace.LanguageR, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", + filePath: "/dir/foo.ipynb", + expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", + }, + { + name: "sql jupyter notebook and file", + language: workspace.LanguageSql, + notebookExportFormat: workspace.ExportFormatJupyter, + notebookPath: "/dir/foo", filePath: "/dir/foo.ipynb", expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue", },