2022-12-14 14:37:14 +00:00
|
|
|
package internal
|
|
|
|
|
|
|
|
import (
|
2023-05-31 11:24:20 +00:00
|
|
|
"bytes"
|
2022-12-14 14:37:14 +00:00
|
|
|
"context"
|
2024-05-30 11:59:27 +00:00
|
|
|
"encoding/json"
|
2022-12-14 14:37:14 +00:00
|
|
|
"errors"
|
|
|
|
"io"
|
2023-05-31 12:22:26 +00:00
|
|
|
"io/fs"
|
2024-05-30 11:59:27 +00:00
|
|
|
"path"
|
2023-06-12 19:03:46 +00:00
|
|
|
"regexp"
|
2022-12-14 14:37:14 +00:00
|
|
|
"strings"
|
|
|
|
"testing"
|
|
|
|
|
2023-05-16 16:35:39 +00:00
|
|
|
"github.com/databricks/cli/libs/filer"
|
2022-12-14 14:37:14 +00:00
|
|
|
"github.com/stretchr/testify/assert"
|
|
|
|
"github.com/stretchr/testify/require"
|
|
|
|
)
|
|
|
|
|
|
|
|
type filerTest struct {
|
|
|
|
*testing.T
|
|
|
|
filer.Filer
|
|
|
|
}
|
|
|
|
|
|
|
|
func (f filerTest) assertContents(ctx context.Context, name string, contents string) {
|
|
|
|
reader, err := f.Read(ctx, name)
|
|
|
|
if !assert.NoError(f, err) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-06-12 13:53:58 +00:00
|
|
|
defer reader.Close()
|
|
|
|
|
2023-05-31 11:24:20 +00:00
|
|
|
var body bytes.Buffer
|
|
|
|
_, err = io.Copy(&body, reader)
|
2022-12-14 14:37:14 +00:00
|
|
|
if !assert.NoError(f, err) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-05-31 11:24:20 +00:00
|
|
|
assert.Equal(f, contents, body.String())
|
2022-12-14 14:37:14 +00:00
|
|
|
}
|
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
func (f filerTest) assertContentsJupyter(ctx context.Context, name string, language string) {
|
2024-05-30 11:59:27 +00:00
|
|
|
reader, err := f.Read(ctx, name)
|
|
|
|
if !assert.NoError(f, err) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
defer reader.Close()
|
|
|
|
|
|
|
|
var body bytes.Buffer
|
|
|
|
_, err = io.Copy(&body, reader)
|
|
|
|
if !assert.NoError(f, err) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
var actual map[string]any
|
|
|
|
err = json.Unmarshal(body.Bytes(), &actual)
|
|
|
|
if !assert.NoError(f, err) {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Since a roundtrip to the workspace changes a Jupyter notebook's payload,
|
|
|
|
// the best we can do is assert that the nbformat is correct.
|
|
|
|
assert.EqualValues(f, 4, actual["nbformat"])
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
assert.Equal(f, language, actual["metadata"].(map[string]any)["language_info"].(map[string]any)["name"])
|
2024-05-30 11:59:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (f filerTest) assertNotExists(ctx context.Context, name string) {
|
|
|
|
_, err := f.Stat(ctx, name)
|
|
|
|
assert.ErrorIs(f, err, fs.ErrNotExist)
|
|
|
|
}
|
|
|
|
|
2024-02-20 16:14:37 +00:00
|
|
|
func commonFilerRecursiveDeleteTest(t *testing.T, ctx context.Context, f filer.Filer) {
|
2023-05-31 09:11:17 +00:00
|
|
|
var err error
|
|
|
|
|
2024-02-20 16:14:37 +00:00
|
|
|
err = f.Write(ctx, "dir/file1", strings.NewReader("content1"), filer.CreateParentDirectories)
|
|
|
|
require.NoError(t, err)
|
|
|
|
filerTest{t, f}.assertContents(ctx, "dir/file1", `content1`)
|
|
|
|
|
|
|
|
err = f.Write(ctx, "dir/file2", strings.NewReader("content2"), filer.CreateParentDirectories)
|
|
|
|
require.NoError(t, err)
|
|
|
|
filerTest{t, f}.assertContents(ctx, "dir/file2", `content2`)
|
|
|
|
|
|
|
|
err = f.Write(ctx, "dir/subdir1/file3", strings.NewReader("content3"), filer.CreateParentDirectories)
|
|
|
|
require.NoError(t, err)
|
|
|
|
filerTest{t, f}.assertContents(ctx, "dir/subdir1/file3", `content3`)
|
|
|
|
|
|
|
|
err = f.Write(ctx, "dir/subdir1/file4", strings.NewReader("content4"), filer.CreateParentDirectories)
|
|
|
|
require.NoError(t, err)
|
|
|
|
filerTest{t, f}.assertContents(ctx, "dir/subdir1/file4", `content4`)
|
|
|
|
|
|
|
|
err = f.Write(ctx, "dir/subdir2/file5", strings.NewReader("content5"), filer.CreateParentDirectories)
|
|
|
|
require.NoError(t, err)
|
|
|
|
filerTest{t, f}.assertContents(ctx, "dir/subdir2/file5", `content5`)
|
|
|
|
|
|
|
|
err = f.Write(ctx, "dir/subdir2/file6", strings.NewReader("content6"), filer.CreateParentDirectories)
|
|
|
|
require.NoError(t, err)
|
|
|
|
filerTest{t, f}.assertContents(ctx, "dir/subdir2/file6", `content6`)
|
|
|
|
|
|
|
|
entriesBeforeDelete, err := f.ReadDir(ctx, "dir")
|
|
|
|
require.NoError(t, err)
|
|
|
|
assert.Len(t, entriesBeforeDelete, 4)
|
|
|
|
|
|
|
|
names := []string{}
|
|
|
|
for _, e := range entriesBeforeDelete {
|
|
|
|
names = append(names, e.Name())
|
|
|
|
}
|
|
|
|
assert.Equal(t, names, []string{"file1", "file2", "subdir1", "subdir2"})
|
|
|
|
|
|
|
|
err = f.Delete(ctx, "dir")
|
|
|
|
assert.ErrorAs(t, err, &filer.DirectoryNotEmptyError{})
|
|
|
|
|
|
|
|
err = f.Delete(ctx, "dir", filer.DeleteRecursively)
|
|
|
|
assert.NoError(t, err)
|
|
|
|
_, err = f.ReadDir(ctx, "dir")
|
|
|
|
assert.ErrorAs(t, err, &filer.NoSuchDirectoryError{})
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestAccFilerRecursiveDelete(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
|
|
|
for _, testCase := range []struct {
|
|
|
|
name string
|
|
|
|
f func(t *testing.T) (filer.Filer, string)
|
|
|
|
}{
|
|
|
|
{"local", setupLocalFiler},
|
|
|
|
{"workspace files", setupWsfsFiler},
|
|
|
|
{"dbfs", setupDbfsFiler},
|
|
|
|
{"files", setupUcVolumesFiler},
|
2024-05-30 11:59:27 +00:00
|
|
|
{"workspace files extensions", setupWsfsExtensionsFiler},
|
2024-02-20 16:14:37 +00:00
|
|
|
} {
|
|
|
|
tc := testCase
|
|
|
|
|
|
|
|
t.Run(testCase.name, func(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
f, _ := tc.f(t)
|
|
|
|
ctx := context.Background()
|
|
|
|
|
|
|
|
// Common tests we run across all filers to ensure consistent behavior.
|
|
|
|
commonFilerRecursiveDeleteTest(t, ctx, f)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Common tests we run across all filers to ensure consistent behavior.
|
|
|
|
func commonFilerReadWriteTests(t *testing.T, ctx context.Context, f filer.Filer) {
|
|
|
|
var err error
|
|
|
|
|
|
|
|
// Write should fail because the intermediate directory doesn't exist.
|
2023-01-05 11:03:31 +00:00
|
|
|
err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello world`))
|
2022-12-14 14:37:14 +00:00
|
|
|
assert.True(t, errors.As(err, &filer.NoSuchDirectoryError{}))
|
2023-05-31 18:47:00 +00:00
|
|
|
assert.True(t, errors.Is(err, fs.ErrNotExist))
|
2022-12-14 14:37:14 +00:00
|
|
|
|
2024-02-20 16:14:37 +00:00
|
|
|
// Read should fail because the intermediate directory doesn't yet exist.
|
2022-12-14 14:37:14 +00:00
|
|
|
_, err = f.Read(ctx, "/foo/bar")
|
2023-05-31 11:24:20 +00:00
|
|
|
assert.True(t, errors.As(err, &filer.FileDoesNotExistError{}))
|
2023-05-31 18:47:00 +00:00
|
|
|
assert.True(t, errors.Is(err, fs.ErrNotExist))
|
2022-12-14 14:37:14 +00:00
|
|
|
|
2023-06-08 16:15:12 +00:00
|
|
|
// Read should fail because the path points to a directory
|
|
|
|
err = f.Mkdir(ctx, "/dir")
|
|
|
|
require.NoError(t, err)
|
|
|
|
_, err = f.Read(ctx, "/dir")
|
|
|
|
assert.ErrorIs(t, err, fs.ErrInvalid)
|
|
|
|
|
2022-12-14 14:37:14 +00:00
|
|
|
// Write with CreateParentDirectories flag should succeed.
|
2023-01-05 11:03:31 +00:00
|
|
|
err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello world`), filer.CreateParentDirectories)
|
2022-12-14 14:37:14 +00:00
|
|
|
assert.NoError(t, err)
|
2023-01-05 11:03:31 +00:00
|
|
|
filerTest{t, f}.assertContents(ctx, "/foo/bar", `hello world`)
|
2022-12-14 14:37:14 +00:00
|
|
|
|
|
|
|
// Write should fail because there is an existing file at the specified path.
|
2023-01-05 11:03:31 +00:00
|
|
|
err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello universe`))
|
2022-12-14 14:37:14 +00:00
|
|
|
assert.True(t, errors.As(err, &filer.FileAlreadyExistsError{}))
|
2023-05-31 18:47:00 +00:00
|
|
|
assert.True(t, errors.Is(err, fs.ErrExist))
|
2022-12-14 14:37:14 +00:00
|
|
|
|
|
|
|
// Write with OverwriteIfExists should succeed.
|
2023-01-05 11:03:31 +00:00
|
|
|
err = f.Write(ctx, "/foo/bar", strings.NewReader(`hello universe`), filer.OverwriteIfExists)
|
2022-12-14 14:37:14 +00:00
|
|
|
assert.NoError(t, err)
|
2023-01-05 11:03:31 +00:00
|
|
|
filerTest{t, f}.assertContents(ctx, "/foo/bar", `hello universe`)
|
2022-12-14 14:37:14 +00:00
|
|
|
|
2023-06-06 06:27:47 +00:00
|
|
|
// Write should succeed if there is no existing file at the specified path.
|
|
|
|
err = f.Write(ctx, "/foo/qux", strings.NewReader(`hello universe`))
|
|
|
|
assert.NoError(t, err)
|
|
|
|
|
2023-06-01 18:23:22 +00:00
|
|
|
// Stat on a directory should succeed.
|
2023-06-19 18:29:13 +00:00
|
|
|
// Note: size and modification time behave differently between backends.
|
2023-06-01 18:23:22 +00:00
|
|
|
info, err := f.Stat(ctx, "/foo")
|
|
|
|
require.NoError(t, err)
|
|
|
|
assert.Equal(t, "foo", info.Name())
|
|
|
|
assert.True(t, info.Mode().IsDir())
|
|
|
|
assert.Equal(t, true, info.IsDir())
|
|
|
|
|
|
|
|
// Stat on a file should succeed.
|
2023-06-19 18:29:13 +00:00
|
|
|
// Note: size and modification time behave differently between backends.
|
2023-06-01 18:23:22 +00:00
|
|
|
info, err = f.Stat(ctx, "/foo/bar")
|
|
|
|
require.NoError(t, err)
|
|
|
|
assert.Equal(t, "bar", info.Name())
|
|
|
|
assert.True(t, info.Mode().IsRegular())
|
|
|
|
assert.Equal(t, false, info.IsDir())
|
|
|
|
|
2022-12-14 14:37:14 +00:00
|
|
|
// Delete should fail if the file doesn't exist.
|
|
|
|
err = f.Delete(ctx, "/doesnt_exist")
|
2024-02-20 16:14:37 +00:00
|
|
|
assert.ErrorAs(t, err, &filer.FileDoesNotExistError{})
|
2023-05-31 18:47:00 +00:00
|
|
|
assert.True(t, errors.Is(err, fs.ErrNotExist))
|
2022-12-14 14:37:14 +00:00
|
|
|
|
2023-06-01 18:23:22 +00:00
|
|
|
// Stat should fail if the file doesn't exist.
|
|
|
|
_, err = f.Stat(ctx, "/doesnt_exist")
|
2024-02-20 16:14:37 +00:00
|
|
|
assert.ErrorAs(t, err, &filer.FileDoesNotExistError{})
|
2023-06-01 18:23:22 +00:00
|
|
|
assert.True(t, errors.Is(err, fs.ErrNotExist))
|
|
|
|
|
2022-12-14 14:37:14 +00:00
|
|
|
// Delete should succeed for file that does exist.
|
|
|
|
err = f.Delete(ctx, "/foo/bar")
|
|
|
|
assert.NoError(t, err)
|
2023-06-06 06:27:47 +00:00
|
|
|
|
|
|
|
// Delete should fail for a non-empty directory.
|
|
|
|
err = f.Delete(ctx, "/foo")
|
2024-02-20 16:14:37 +00:00
|
|
|
assert.ErrorAs(t, err, &filer.DirectoryNotEmptyError{})
|
2023-06-06 06:27:47 +00:00
|
|
|
assert.True(t, errors.Is(err, fs.ErrInvalid))
|
|
|
|
|
|
|
|
// Delete should succeed for a non-empty directory if the DeleteRecursively flag is set.
|
|
|
|
err = f.Delete(ctx, "/foo", filer.DeleteRecursively)
|
|
|
|
assert.NoError(t, err)
|
|
|
|
|
|
|
|
// Delete of the filer root should ALWAYS fail, otherwise subsequent writes would fail.
|
|
|
|
// It is not in the filer's purview to delete its root directory.
|
|
|
|
err = f.Delete(ctx, "/")
|
|
|
|
assert.True(t, errors.As(err, &filer.CannotDeleteRootError{}))
|
|
|
|
assert.True(t, errors.Is(err, fs.ErrInvalid))
|
2022-12-14 14:37:14 +00:00
|
|
|
}
|
2023-05-31 09:11:17 +00:00
|
|
|
|
2024-02-20 16:14:37 +00:00
|
|
|
func TestAccFilerReadWrite(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
|
|
|
for _, testCase := range []struct {
|
|
|
|
name string
|
|
|
|
f func(t *testing.T) (filer.Filer, string)
|
|
|
|
}{
|
|
|
|
{"local", setupLocalFiler},
|
|
|
|
{"workspace files", setupWsfsFiler},
|
|
|
|
{"dbfs", setupDbfsFiler},
|
|
|
|
{"files", setupUcVolumesFiler},
|
2024-05-30 11:59:27 +00:00
|
|
|
{"workspace files extensions", setupWsfsExtensionsFiler},
|
2024-02-20 16:14:37 +00:00
|
|
|
} {
|
|
|
|
tc := testCase
|
|
|
|
|
|
|
|
t.Run(testCase.name, func(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
f, _ := tc.f(t)
|
|
|
|
ctx := context.Background()
|
|
|
|
|
|
|
|
// Common tests we run across all filers to ensure consistent behavior.
|
|
|
|
commonFilerReadWriteTests(t, ctx, f)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Common tests we run across all filers to ensure consistent behavior.
|
|
|
|
func commonFilerReadDirTest(t *testing.T, ctx context.Context, f filer.Filer) {
|
2023-05-31 09:11:17 +00:00
|
|
|
var err error
|
2023-05-31 12:22:26 +00:00
|
|
|
var info fs.FileInfo
|
2023-05-31 09:11:17 +00:00
|
|
|
|
|
|
|
// We start with an empty directory.
|
|
|
|
entries, err := f.ReadDir(ctx, ".")
|
|
|
|
require.NoError(t, err)
|
|
|
|
assert.Len(t, entries, 0)
|
|
|
|
|
|
|
|
// Write a file.
|
|
|
|
err = f.Write(ctx, "/hello.txt", strings.NewReader(`hello world`))
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// Create a directory.
|
|
|
|
err = f.Mkdir(ctx, "/dir")
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// Write a file.
|
|
|
|
err = f.Write(ctx, "/dir/world.txt", strings.NewReader(`hello world`))
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// Create a nested directory (check that it creates intermediate directories).
|
|
|
|
err = f.Mkdir(ctx, "/dir/a/b/c")
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// Expect an error if the path doesn't exist.
|
|
|
|
_, err = f.ReadDir(ctx, "/dir/a/b/c/d/e")
|
2023-05-31 11:24:20 +00:00
|
|
|
assert.True(t, errors.As(err, &filer.NoSuchDirectoryError{}), err)
|
2023-05-31 18:47:00 +00:00
|
|
|
assert.True(t, errors.Is(err, fs.ErrNotExist))
|
2023-05-31 09:11:17 +00:00
|
|
|
|
|
|
|
// Expect two entries in the root.
|
|
|
|
entries, err = f.ReadDir(ctx, ".")
|
|
|
|
require.NoError(t, err)
|
|
|
|
assert.Len(t, entries, 2)
|
2023-05-31 12:22:26 +00:00
|
|
|
assert.Equal(t, "dir", entries[0].Name())
|
|
|
|
assert.True(t, entries[0].IsDir())
|
|
|
|
assert.Equal(t, "hello.txt", entries[1].Name())
|
|
|
|
assert.False(t, entries[1].IsDir())
|
|
|
|
info, err = entries[1].Info()
|
|
|
|
require.NoError(t, err)
|
|
|
|
assert.Greater(t, info.ModTime().Unix(), int64(0))
|
2023-05-31 09:11:17 +00:00
|
|
|
|
|
|
|
// Expect two entries in the directory.
|
|
|
|
entries, err = f.ReadDir(ctx, "/dir")
|
|
|
|
require.NoError(t, err)
|
|
|
|
assert.Len(t, entries, 2)
|
2023-05-31 12:22:26 +00:00
|
|
|
assert.Equal(t, "a", entries[0].Name())
|
|
|
|
assert.True(t, entries[0].IsDir())
|
|
|
|
assert.Equal(t, "world.txt", entries[1].Name())
|
|
|
|
assert.False(t, entries[1].IsDir())
|
|
|
|
info, err = entries[1].Info()
|
|
|
|
require.NoError(t, err)
|
|
|
|
assert.Greater(t, info.ModTime().Unix(), int64(0))
|
2023-05-31 09:11:17 +00:00
|
|
|
|
|
|
|
// Expect a single entry in the nested path.
|
|
|
|
entries, err = f.ReadDir(ctx, "/dir/a/b")
|
|
|
|
require.NoError(t, err)
|
|
|
|
assert.Len(t, entries, 1)
|
2023-05-31 12:22:26 +00:00
|
|
|
assert.Equal(t, "c", entries[0].Name())
|
|
|
|
assert.True(t, entries[0].IsDir())
|
2023-06-02 10:28:35 +00:00
|
|
|
|
|
|
|
// Expect an error trying to call ReadDir on a file
|
|
|
|
_, err = f.ReadDir(ctx, "/hello.txt")
|
|
|
|
assert.ErrorIs(t, err, fs.ErrInvalid)
|
|
|
|
|
|
|
|
// Expect 0 entries for an empty directory
|
|
|
|
err = f.Mkdir(ctx, "empty-dir")
|
|
|
|
require.NoError(t, err)
|
|
|
|
entries, err = f.ReadDir(ctx, "empty-dir")
|
|
|
|
assert.NoError(t, err)
|
|
|
|
assert.Len(t, entries, 0)
|
|
|
|
|
|
|
|
// Expect one entry for a directory with a file in it
|
|
|
|
err = f.Write(ctx, "dir-with-one-file/my-file.txt", strings.NewReader("abc"), filer.CreateParentDirectories)
|
|
|
|
require.NoError(t, err)
|
|
|
|
entries, err = f.ReadDir(ctx, "dir-with-one-file")
|
|
|
|
assert.NoError(t, err)
|
|
|
|
assert.Len(t, entries, 1)
|
|
|
|
assert.Equal(t, entries[0].Name(), "my-file.txt")
|
|
|
|
assert.False(t, entries[0].IsDir())
|
2023-05-31 09:11:17 +00:00
|
|
|
}
|
2023-05-31 11:24:20 +00:00
|
|
|
|
2024-02-20 16:14:37 +00:00
|
|
|
func TestAccFilerReadDir(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
|
|
|
for _, testCase := range []struct {
|
|
|
|
name string
|
|
|
|
f func(t *testing.T) (filer.Filer, string)
|
|
|
|
}{
|
|
|
|
{"local", setupLocalFiler},
|
|
|
|
{"workspace files", setupWsfsFiler},
|
|
|
|
{"dbfs", setupDbfsFiler},
|
|
|
|
{"files", setupUcVolumesFiler},
|
2024-05-30 11:59:27 +00:00
|
|
|
{"workspace files extensions", setupWsfsExtensionsFiler},
|
2024-02-20 16:14:37 +00:00
|
|
|
} {
|
|
|
|
tc := testCase
|
|
|
|
|
|
|
|
t.Run(testCase.name, func(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
f, _ := tc.f(t)
|
|
|
|
ctx := context.Background()
|
|
|
|
|
|
|
|
commonFilerReadDirTest(t, ctx, f)
|
|
|
|
})
|
2023-05-31 11:24:20 +00:00
|
|
|
}
|
|
|
|
}
|
2023-06-12 13:53:58 +00:00
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
func TestAccFilerWorkspaceNotebook(t *testing.T) {
|
2024-05-30 11:59:27 +00:00
|
|
|
t.Parallel()
|
|
|
|
|
2024-02-20 16:14:37 +00:00
|
|
|
ctx := context.Background()
|
2023-06-12 19:03:46 +00:00
|
|
|
var err error
|
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
tcases := []struct {
|
|
|
|
name string
|
|
|
|
nameWithoutExt string
|
|
|
|
content1 string
|
|
|
|
expected1 string
|
|
|
|
content2 string
|
|
|
|
expected2 string
|
|
|
|
}{
|
|
|
|
{
|
|
|
|
name: "pyNb.py",
|
|
|
|
nameWithoutExt: "pyNb",
|
|
|
|
content1: "# Databricks notebook source\nprint('first upload')",
|
|
|
|
expected1: "# Databricks notebook source\nprint('first upload')",
|
|
|
|
content2: "# Databricks notebook source\nprint('second upload')",
|
|
|
|
expected2: "# Databricks notebook source\nprint('second upload')",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: "rNb.r",
|
|
|
|
nameWithoutExt: "rNb",
|
|
|
|
content1: "# Databricks notebook source\nprint('first upload')",
|
|
|
|
expected1: "# Databricks notebook source\nprint('first upload')",
|
|
|
|
content2: "# Databricks notebook source\nprint('second upload')",
|
|
|
|
expected2: "# Databricks notebook source\nprint('second upload')",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: "sqlNb.sql",
|
|
|
|
nameWithoutExt: "sqlNb",
|
|
|
|
content1: "-- Databricks notebook source\n SELECT \"first upload\"",
|
|
|
|
expected1: "-- Databricks notebook source\n SELECT \"first upload\"",
|
|
|
|
content2: "-- Databricks notebook source\n SELECT \"second upload\"",
|
|
|
|
expected2: "-- Databricks notebook source\n SELECT \"second upload\"",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: "scalaNb.scala",
|
|
|
|
nameWithoutExt: "scalaNb",
|
|
|
|
content1: "// Databricks notebook source\n println(\"first upload\")",
|
|
|
|
expected1: "// Databricks notebook source\n println(\"first upload\")",
|
|
|
|
content2: "// Databricks notebook source\n println(\"second upload\")",
|
|
|
|
expected2: "// Databricks notebook source\n println(\"second upload\")",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: "pythonJupyterNb.ipynb",
|
|
|
|
nameWithoutExt: "pythonJupyterNb",
|
|
|
|
content1: readFile(t, "testdata/notebooks/py1.ipynb"),
|
|
|
|
expected1: "# Databricks notebook source\nprint(1)",
|
|
|
|
content2: readFile(t, "testdata/notebooks/py2.ipynb"),
|
|
|
|
expected2: "# Databricks notebook source\nprint(2)",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: "rJupyterNb.ipynb",
|
|
|
|
nameWithoutExt: "rJupyterNb",
|
|
|
|
content1: readFile(t, "testdata/notebooks/r1.ipynb"),
|
|
|
|
expected1: "# Databricks notebook source\nprint(1)",
|
|
|
|
content2: readFile(t, "testdata/notebooks/r2.ipynb"),
|
|
|
|
expected2: "# Databricks notebook source\nprint(2)",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: "scalaJupyterNb.ipynb",
|
|
|
|
nameWithoutExt: "scalaJupyterNb",
|
|
|
|
content1: readFile(t, "testdata/notebooks/scala1.ipynb"),
|
|
|
|
expected1: "// Databricks notebook source\nprintln(1)",
|
|
|
|
content2: readFile(t, "testdata/notebooks/scala2.ipynb"),
|
|
|
|
expected2: "// Databricks notebook source\nprintln(2)",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: "sqlJupyterNotebook.ipynb",
|
|
|
|
nameWithoutExt: "sqlJupyterNotebook",
|
|
|
|
content1: readFile(t, "testdata/notebooks/sql1.ipynb"),
|
|
|
|
expected1: "-- Databricks notebook source\nselect 1",
|
|
|
|
content2: readFile(t, "testdata/notebooks/sql2.ipynb"),
|
|
|
|
expected2: "-- Databricks notebook source\nselect 2",
|
|
|
|
},
|
|
|
|
}
|
2023-06-12 19:03:46 +00:00
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
for _, tc := range tcases {
|
|
|
|
f, _ := setupWsfsFiler(t)
|
|
|
|
t.Run(tc.name, func(t *testing.T) {
|
|
|
|
t.Parallel()
|
2023-06-12 19:03:46 +00:00
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
// Upload the notebook
|
|
|
|
err = f.Write(ctx, tc.name, strings.NewReader(tc.content1))
|
|
|
|
require.NoError(t, err)
|
2024-05-30 11:59:27 +00:00
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
// Assert contents after initial upload. Note that we expect the content
|
|
|
|
// for jupyter notebooks to be of type source because the workspace files
|
|
|
|
// client always uses the source format to read notebooks from the workspace.
|
|
|
|
filerTest{t, f}.assertContents(ctx, tc.nameWithoutExt, tc.expected1)
|
2023-06-12 19:03:46 +00:00
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
// Assert uploading a second time fails due to overwrite mode missing
|
|
|
|
err = f.Write(ctx, tc.name, strings.NewReader(tc.content2))
|
|
|
|
assert.ErrorIs(t, err, fs.ErrExist)
|
|
|
|
assert.Regexp(t, regexp.MustCompile(`file already exists: .*/`+tc.nameWithoutExt+`$`), err.Error())
|
2023-06-12 19:03:46 +00:00
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
// Try uploading the notebook again with overwrite flag. This time it should succeed.
|
|
|
|
err = f.Write(ctx, tc.name, strings.NewReader(tc.content2), filer.OverwriteIfExists)
|
|
|
|
require.NoError(t, err)
|
2023-06-12 19:03:46 +00:00
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
// Assert contents after second upload
|
|
|
|
filerTest{t, f}.assertContents(ctx, tc.nameWithoutExt, tc.expected2)
|
|
|
|
})
|
|
|
|
}
|
2023-06-12 19:03:46 +00:00
|
|
|
|
|
|
|
}
|
2024-05-30 11:59:27 +00:00
|
|
|
|
|
|
|
func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
|
|
|
files := []struct {
|
|
|
|
name string
|
|
|
|
content string
|
|
|
|
}{
|
|
|
|
{"dir1/dir2/dir3/file.txt", "file content"},
|
2024-06-04 09:53:14 +00:00
|
|
|
{"dir1/notebook.py", "# Databricks notebook source\nprint('first upload'))"},
|
2024-05-30 11:59:27 +00:00
|
|
|
{"foo.py", "print('foo')"},
|
|
|
|
{"foo.r", "print('foo')"},
|
|
|
|
{"foo.scala", "println('foo')"},
|
|
|
|
{"foo.sql", "SELECT 'foo'"},
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
{"py1.ipynb", readFile(t, "testdata/notebooks/py1.ipynb")},
|
2024-05-30 11:59:27 +00:00
|
|
|
{"pyNb.py", "# Databricks notebook source\nprint('first upload'))"},
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
{"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")},
|
2024-05-30 11:59:27 +00:00
|
|
|
{"rNb.r", "# Databricks notebook source\nprint('first upload'))"},
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
{"scala1.ipynb", readFile(t, "testdata/notebooks/scala1.ipynb")},
|
2024-05-30 11:59:27 +00:00
|
|
|
{"scalaNb.scala", "// Databricks notebook source\n println(\"first upload\"))"},
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
{"sql1.ipynb", readFile(t, "testdata/notebooks/sql1.ipynb")},
|
2024-05-30 11:59:27 +00:00
|
|
|
{"sqlNb.sql", "-- Databricks notebook source\n SELECT \"first upload\""},
|
|
|
|
}
|
|
|
|
|
2024-06-04 09:53:14 +00:00
|
|
|
// Assert that every file has a unique basename
|
|
|
|
basenames := map[string]struct{}{}
|
|
|
|
for _, f := range files {
|
|
|
|
basename := path.Base(f.name)
|
|
|
|
if _, ok := basenames[basename]; ok {
|
|
|
|
t.Fatalf("basename %s is not unique", basename)
|
|
|
|
}
|
|
|
|
basenames[basename] = struct{}{}
|
|
|
|
}
|
|
|
|
|
2024-05-30 11:59:27 +00:00
|
|
|
ctx := context.Background()
|
|
|
|
wf, _ := setupWsfsExtensionsFiler(t)
|
|
|
|
|
|
|
|
for _, f := range files {
|
|
|
|
err := wf.Write(ctx, f.name, strings.NewReader(f.content), filer.CreateParentDirectories)
|
|
|
|
require.NoError(t, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read entries
|
|
|
|
entries, err := wf.ReadDir(ctx, ".")
|
|
|
|
require.NoError(t, err)
|
|
|
|
names := []string{}
|
|
|
|
for _, e := range entries {
|
|
|
|
names = append(names, e.Name())
|
|
|
|
}
|
|
|
|
assert.Equal(t, []string{
|
|
|
|
"dir1",
|
|
|
|
"foo.py",
|
|
|
|
"foo.r",
|
|
|
|
"foo.scala",
|
|
|
|
"foo.sql",
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
"py1.ipynb",
|
2024-05-30 11:59:27 +00:00
|
|
|
"pyNb.py",
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
"r1.ipynb",
|
2024-05-30 11:59:27 +00:00
|
|
|
"rNb.r",
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
"scala1.ipynb",
|
2024-05-30 11:59:27 +00:00
|
|
|
"scalaNb.scala",
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
"sql1.ipynb",
|
2024-05-30 11:59:27 +00:00
|
|
|
"sqlNb.sql",
|
|
|
|
}, names)
|
2024-06-04 09:53:14 +00:00
|
|
|
|
|
|
|
// Read entries in subdirectory
|
|
|
|
entries, err = wf.ReadDir(ctx, "dir1")
|
|
|
|
require.NoError(t, err)
|
|
|
|
names = []string{}
|
|
|
|
for _, e := range entries {
|
|
|
|
names = append(names, e.Name())
|
|
|
|
}
|
|
|
|
assert.Equal(t, []string{
|
|
|
|
"dir2",
|
|
|
|
"notebook.py",
|
|
|
|
}, names)
|
2024-05-30 11:59:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func setupFilerWithExtensionsTest(t *testing.T) filer.Filer {
|
|
|
|
files := []struct {
|
|
|
|
name string
|
|
|
|
content string
|
|
|
|
}{
|
|
|
|
{"foo.py", "# Databricks notebook source\nprint('first upload'))"},
|
|
|
|
{"bar.py", "print('foo')"},
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
{"p1.ipynb", readFile(t, "testdata/notebooks/py1.ipynb")},
|
|
|
|
{"r1.ipynb", readFile(t, "testdata/notebooks/r1.ipynb")},
|
|
|
|
{"scala1.ipynb", readFile(t, "testdata/notebooks/scala1.ipynb")},
|
|
|
|
{"sql1.ipynb", readFile(t, "testdata/notebooks/sql1.ipynb")},
|
2024-05-30 11:59:27 +00:00
|
|
|
{"pretender", "not a notebook"},
|
|
|
|
{"dir/file.txt", "file content"},
|
|
|
|
{"scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')"},
|
|
|
|
}
|
|
|
|
|
|
|
|
ctx := context.Background()
|
|
|
|
wf, _ := setupWsfsExtensionsFiler(t)
|
|
|
|
|
|
|
|
for _, f := range files {
|
|
|
|
err := wf.Write(ctx, f.name, strings.NewReader(f.content), filer.CreateParentDirectories)
|
|
|
|
require.NoError(t, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return wf
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestAccFilerWorkspaceFilesExtensionsRead(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
|
|
|
ctx := context.Background()
|
|
|
|
wf := setupFilerWithExtensionsTest(t)
|
|
|
|
|
|
|
|
// Read contents of test fixtures as a sanity check.
|
|
|
|
filerTest{t, wf}.assertContents(ctx, "foo.py", "# Databricks notebook source\nprint('first upload'))")
|
|
|
|
filerTest{t, wf}.assertContents(ctx, "bar.py", "print('foo')")
|
|
|
|
filerTest{t, wf}.assertContents(ctx, "dir/file.txt", "file content")
|
|
|
|
filerTest{t, wf}.assertContents(ctx, "scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')")
|
|
|
|
filerTest{t, wf}.assertContents(ctx, "pretender", "not a notebook")
|
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
filerTest{t, wf}.assertContentsJupyter(ctx, "p1.ipynb", "python")
|
|
|
|
filerTest{t, wf}.assertContentsJupyter(ctx, "r1.ipynb", "r")
|
|
|
|
filerTest{t, wf}.assertContentsJupyter(ctx, "scala1.ipynb", "scala")
|
|
|
|
filerTest{t, wf}.assertContentsJupyter(ctx, "sql1.ipynb", "sql")
|
|
|
|
|
2024-05-30 11:59:27 +00:00
|
|
|
// Read non-existent file
|
|
|
|
_, err := wf.Read(ctx, "non-existent.py")
|
|
|
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
|
|
|
|
|
|
// Ensure we do not read a regular file as a notebook
|
|
|
|
_, err = wf.Read(ctx, "pretender.py")
|
|
|
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
|
|
_, err = wf.Read(ctx, "pretender.ipynb")
|
|
|
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
|
|
|
|
|
|
// Read directory
|
|
|
|
_, err = wf.Read(ctx, "dir")
|
|
|
|
assert.ErrorIs(t, err, fs.ErrInvalid)
|
|
|
|
|
|
|
|
// Ensure we do not read a Scala notebook as a Python notebook
|
|
|
|
_, err = wf.Read(ctx, "scala-notebook.py")
|
|
|
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestAccFilerWorkspaceFilesExtensionsDelete(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
|
|
|
ctx := context.Background()
|
|
|
|
wf := setupFilerWithExtensionsTest(t)
|
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
for _, fileName := range []string{
|
|
|
|
// notebook
|
|
|
|
"foo.py",
|
|
|
|
// file
|
|
|
|
"bar.py",
|
|
|
|
// python jupyter notebook
|
|
|
|
"p1.ipynb",
|
|
|
|
// R jupyter notebook
|
|
|
|
"r1.ipynb",
|
|
|
|
// Scala jupyter notebook
|
|
|
|
"scala1.ipynb",
|
|
|
|
// SQL jupyter notebook
|
|
|
|
"sql1.ipynb",
|
|
|
|
} {
|
|
|
|
err := wf.Delete(ctx, fileName)
|
|
|
|
require.NoError(t, err)
|
|
|
|
filerTest{t, wf}.assertNotExists(ctx, fileName)
|
|
|
|
}
|
2024-05-30 11:59:27 +00:00
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
for _, fileName := range []string{
|
|
|
|
// do not delete non-existent file
|
|
|
|
"non-existent.py",
|
|
|
|
// do not delete a file assuming it is a notebook and stripping the extension
|
|
|
|
"pretender.py",
|
|
|
|
// do not delete a Scala notebook as a Python notebook
|
|
|
|
"scala-notebook.py",
|
|
|
|
// do not delete a file assuming it is a Jupyter notebook and stripping the extension
|
|
|
|
"pretender.ipynb",
|
|
|
|
} {
|
|
|
|
err := wf.Delete(ctx, fileName)
|
|
|
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
|
|
}
|
2024-05-30 11:59:27 +00:00
|
|
|
|
|
|
|
// Delete directory
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
err := wf.Delete(ctx, "dir")
|
2024-05-30 11:59:27 +00:00
|
|
|
assert.ErrorIs(t, err, fs.ErrInvalid)
|
|
|
|
|
|
|
|
// Delete directory recursively
|
|
|
|
err = wf.Delete(ctx, "dir", filer.DeleteRecursively)
|
|
|
|
require.NoError(t, err)
|
|
|
|
filerTest{t, wf}.assertNotExists(ctx, "dir")
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestAccFilerWorkspaceFilesExtensionsStat(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
|
|
|
ctx := context.Background()
|
|
|
|
wf := setupFilerWithExtensionsTest(t)
|
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
for _, fileName := range []string{
|
|
|
|
// notebook
|
|
|
|
"foo.py",
|
|
|
|
// file
|
|
|
|
"bar.py",
|
|
|
|
// python jupyter notebook
|
|
|
|
"p1.ipynb",
|
|
|
|
// R jupyter notebook
|
|
|
|
"r1.ipynb",
|
|
|
|
// Scala jupyter notebook
|
|
|
|
"scala1.ipynb",
|
|
|
|
// SQL jupyter notebook
|
|
|
|
"sql1.ipynb",
|
|
|
|
} {
|
|
|
|
info, err := wf.Stat(ctx, fileName)
|
|
|
|
require.NoError(t, err)
|
|
|
|
assert.Equal(t, fileName, info.Name())
|
|
|
|
assert.False(t, info.IsDir())
|
|
|
|
}
|
2024-05-30 11:59:27 +00:00
|
|
|
|
|
|
|
// Stat on a directory
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
info, err := wf.Stat(ctx, "dir")
|
2024-05-30 11:59:27 +00:00
|
|
|
require.NoError(t, err)
|
|
|
|
assert.Equal(t, "dir", info.Name())
|
|
|
|
assert.True(t, info.IsDir())
|
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
for _, fileName := range []string{
|
|
|
|
// non-existent file
|
|
|
|
"non-existent.py",
|
|
|
|
// do not stat a file assuming it is a notebook and stripping the extension
|
|
|
|
"pretender.py",
|
|
|
|
// do not stat a Scala notebook as a Python notebook
|
|
|
|
"scala-notebook.py",
|
|
|
|
// do not read a regular file assuming it is a Jupyter notebook and stripping the extension
|
|
|
|
"pretender.ipynb",
|
|
|
|
} {
|
|
|
|
_, err := wf.Stat(ctx, fileName)
|
|
|
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
|
|
}
|
2024-05-30 11:59:27 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestAccWorkspaceFilesExtensionsDirectoriesAreNotNotebooks(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
|
|
|
ctx := context.Background()
|
|
|
|
wf, _ := setupWsfsExtensionsFiler(t)
|
|
|
|
|
|
|
|
// Create a directory with an extension
|
|
|
|
err := wf.Mkdir(ctx, "foo")
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// Reading foo.py should fail. foo is a directory, not a notebook.
|
|
|
|
_, err = wf.Read(ctx, "foo.py")
|
|
|
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
|
|
}
|
|
|
|
|
2024-11-18 17:25:24 +00:00
|
|
|
func TestAccWorkspaceFilesExtensionsNotebooksAreNotReadAsFiles(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
|
|
|
ctx := context.Background()
|
|
|
|
wf, _ := setupWsfsExtensionsFiler(t)
|
|
|
|
|
|
|
|
// Create a notebook
|
|
|
|
err := wf.Write(ctx, "foo.ipynb", strings.NewReader(readFile(t, "testdata/notebooks/py1.ipynb")))
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// Reading foo should fail. Even though the WSFS name for the notebook is foo
|
|
|
|
// reading the notebook should only work with the .ipynb extension.
|
|
|
|
_, err = wf.Read(ctx, "foo")
|
|
|
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
|
|
|
|
|
|
_, err = wf.Read(ctx, "foo.ipynb")
|
|
|
|
assert.NoError(t, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestAccWorkspaceFilesExtensionsNotebooksAreNotStatAsFiles(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
|
|
|
ctx := context.Background()
|
|
|
|
wf, _ := setupWsfsExtensionsFiler(t)
|
|
|
|
|
|
|
|
// Create a notebook
|
|
|
|
err := wf.Write(ctx, "foo.ipynb", strings.NewReader(readFile(t, "testdata/notebooks/py1.ipynb")))
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// Stating foo should fail. Even though the WSFS name for the notebook is foo
|
|
|
|
// stating the notebook should only work with the .ipynb extension.
|
|
|
|
_, err = wf.Stat(ctx, "foo")
|
|
|
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
|
|
|
|
|
|
_, err = wf.Stat(ctx, "foo.ipynb")
|
|
|
|
assert.NoError(t, err)
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestAccWorkspaceFilesExtensionsNotebooksAreNotDeletedAsFiles(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
|
|
|
ctx := context.Background()
|
|
|
|
wf, _ := setupWsfsExtensionsFiler(t)
|
|
|
|
|
|
|
|
// Create a notebook
|
|
|
|
err := wf.Write(ctx, "foo.ipynb", strings.NewReader(readFile(t, "testdata/notebooks/py1.ipynb")))
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// Deleting foo should fail. Even though the WSFS name for the notebook is foo
|
|
|
|
// deleting the notebook should only work with the .ipynb extension.
|
|
|
|
err = wf.Delete(ctx, "foo")
|
|
|
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
|
|
|
|
|
|
err = wf.Delete(ctx, "foo.ipynb")
|
|
|
|
assert.NoError(t, err)
|
|
|
|
}
|
|
|
|
|
2024-05-30 11:59:27 +00:00
|
|
|
func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) {
|
|
|
|
t.Parallel()
|
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
// Case 1: Writing source notebooks.
|
|
|
|
for _, tc := range []struct {
|
|
|
|
language string
|
|
|
|
sourceName string
|
|
|
|
sourceContent string
|
|
|
|
jupyterName string
|
|
|
|
jupyterContent string
|
|
|
|
}{
|
|
|
|
{
|
|
|
|
language: "python",
|
|
|
|
sourceName: "foo.py",
|
|
|
|
sourceContent: "# Databricks notebook source\nprint('foo')",
|
|
|
|
jupyterName: "foo.ipynb",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
language: "r",
|
|
|
|
sourceName: "foo.r",
|
|
|
|
sourceContent: "# Databricks notebook source\nprint('foo')",
|
|
|
|
jupyterName: "foo.ipynb",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
language: "scala",
|
|
|
|
sourceName: "foo.scala",
|
|
|
|
sourceContent: "// Databricks notebook source\nprintln('foo')",
|
|
|
|
jupyterName: "foo.ipynb",
|
|
|
|
},
|
|
|
|
{
|
|
|
|
language: "sql",
|
|
|
|
sourceName: "foo.sql",
|
|
|
|
sourceContent: "-- Databricks notebook source\nselect 'foo'",
|
|
|
|
jupyterName: "foo.ipynb",
|
|
|
|
},
|
|
|
|
} {
|
|
|
|
t.Run("source_"+tc.language, func(t *testing.T) {
|
|
|
|
t.Parallel()
|
2024-05-30 11:59:27 +00:00
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
ctx := context.Background()
|
|
|
|
wf, _ := setupWsfsExtensionsFiler(t)
|
|
|
|
|
|
|
|
err := wf.Write(ctx, tc.sourceName, strings.NewReader(tc.sourceContent))
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// Assert on the content of the source notebook that's been written.
|
|
|
|
filerTest{t, wf}.assertContents(ctx, tc.sourceName, tc.sourceContent)
|
|
|
|
|
|
|
|
// Ensure that the source notebook is not read when the name contains
|
|
|
|
// the .ipynb extension.
|
|
|
|
_, err = wf.Stat(ctx, tc.jupyterName)
|
|
|
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
|
|
_, err = wf.Read(ctx, tc.jupyterName)
|
|
|
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
|
|
err = wf.Delete(ctx, tc.jupyterName)
|
|
|
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
|
|
})
|
|
|
|
}
|
2024-05-30 11:59:27 +00:00
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
// Case 2: Writing Jupyter notebooks.
|
|
|
|
for _, tc := range []struct {
|
|
|
|
language string
|
|
|
|
sourceName string
|
|
|
|
jupyterName string
|
|
|
|
jupyterContent string
|
|
|
|
}{
|
|
|
|
{
|
|
|
|
language: "python",
|
|
|
|
sourceName: "foo.py",
|
|
|
|
jupyterName: "foo.ipynb",
|
|
|
|
jupyterContent: readFile(t, "testdata/notebooks/py1.ipynb"),
|
|
|
|
},
|
|
|
|
{
|
|
|
|
language: "r",
|
|
|
|
sourceName: "foo.r",
|
|
|
|
jupyterName: "foo.ipynb",
|
|
|
|
jupyterContent: readFile(t, "testdata/notebooks/r1.ipynb"),
|
|
|
|
},
|
|
|
|
{
|
|
|
|
language: "scala",
|
|
|
|
sourceName: "foo.scala",
|
|
|
|
jupyterName: "foo.ipynb",
|
|
|
|
jupyterContent: readFile(t, "testdata/notebooks/scala1.ipynb"),
|
|
|
|
},
|
|
|
|
{
|
|
|
|
language: "sql",
|
|
|
|
sourceName: "foo.sql",
|
|
|
|
jupyterName: "foo.ipynb",
|
|
|
|
jupyterContent: readFile(t, "testdata/notebooks/sql1.ipynb"),
|
|
|
|
},
|
|
|
|
} {
|
|
|
|
t.Run("jupyter_"+tc.language, func(t *testing.T) {
|
|
|
|
t.Parallel()
|
2024-05-30 11:59:27 +00:00
|
|
|
|
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
|
|
|
ctx := context.Background()
|
|
|
|
wf, _ := setupWsfsExtensionsFiler(t)
|
|
|
|
|
|
|
|
err := wf.Write(ctx, tc.jupyterName, strings.NewReader(tc.jupyterContent))
|
|
|
|
require.NoError(t, err)
|
|
|
|
|
|
|
|
// Assert that the written notebook is jupyter and has the correct
|
|
|
|
// language_info metadata set.
|
|
|
|
filerTest{t, wf}.assertContentsJupyter(ctx, tc.jupyterName, tc.language)
|
|
|
|
|
|
|
|
// Ensure that the Jupyter notebook is not read when the name does not
|
|
|
|
// contain the .ipynb extension.
|
|
|
|
_, err = wf.Stat(ctx, tc.sourceName)
|
|
|
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
|
|
_, err = wf.Read(ctx, tc.sourceName)
|
|
|
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
|
|
err = wf.Delete(ctx, tc.sourceName)
|
|
|
|
assert.ErrorIs(t, err, fs.ErrNotExist)
|
|
|
|
})
|
|
|
|
}
|
2024-05-30 11:59:27 +00:00
|
|
|
}
|