2024-08-21 07:45:25 +00:00
package filer
import (
"context"
"net/http"
"testing"
"github.com/databricks/databricks-sdk-go/experimental/mocks"
"github.com/databricks/databricks-sdk-go/service/workspace"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
)
// Mocks client.DatabricksClient from the databricks-sdk-go package.
type mockApiClient struct {
mock . Mock
}
func ( m * mockApiClient ) Do ( ctx context . Context , method , path string ,
headers map [ string ] string , request any , response any ,
visitors ... func ( * http . Request ) error ) error {
args := m . Called ( ctx , method , path , headers , request , response , visitors )
// Set the http response from a value provided in the mock call.
p := response . ( * wsfsFileInfo )
* p = args . Get ( 1 ) . ( wsfsFileInfo )
return args . Error ( 0 )
}
func TestFilerWorkspaceFilesExtensionsErrorsOnDupName ( t * testing . T ) {
for _ , tc := range [ ] struct {
name string
language workspace . Language
notebookExportFormat workspace . ExportFormat
notebookPath string
filePath string
expectedError string
} {
{
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
name : "python source notebook and file with source extension" ,
2024-08-21 07:45:25 +00:00
language : workspace . LanguagePython ,
notebookExportFormat : workspace . ExportFormatSource ,
notebookPath : "/dir/foo" ,
filePath : "/dir/foo.py" ,
expectedError : "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.py resolve to the same name /foo.py. Changing the name of one of these objects will resolve this issue" ,
} ,
{
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
name : "scala source notebook and file with source extension" ,
2024-08-21 07:45:25 +00:00
language : workspace . LanguageScala ,
notebookExportFormat : workspace . ExportFormatSource ,
notebookPath : "/dir/foo" ,
filePath : "/dir/foo.scala" ,
expectedError : "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.scala resolve to the same name /foo.scala. Changing the name of one of these objects will resolve this issue" ,
} ,
{
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
name : "r source notebook and file with source extension" ,
2024-08-21 07:45:25 +00:00
language : workspace . LanguageR ,
notebookExportFormat : workspace . ExportFormatSource ,
notebookPath : "/dir/foo" ,
filePath : "/dir/foo.r" ,
expectedError : "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.r resolve to the same name /foo.r. Changing the name of one of these objects will resolve this issue" ,
} ,
{
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
name : "sql source notebook and file with source extension" ,
2024-08-21 07:45:25 +00:00
language : workspace . LanguageSql ,
notebookExportFormat : workspace . ExportFormatSource ,
notebookPath : "/dir/foo" ,
filePath : "/dir/foo.sql" ,
expectedError : "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.sql resolve to the same name /foo.sql. Changing the name of one of these objects will resolve this issue" ,
} ,
{
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
name : "python jupyter notebook and file with source extension" ,
2024-08-21 07:45:25 +00:00
language : workspace . LanguagePython ,
notebookExportFormat : workspace . ExportFormatJupyter ,
notebookPath : "/dir/foo" ,
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
filePath : "/dir/foo.py" ,
// Jupyter notebooks would correspond to foo.ipynb so an error is not expected.
expectedError : "" ,
} ,
{
name : "scala jupyter notebook and file with source extension" ,
language : workspace . LanguageScala ,
notebookExportFormat : workspace . ExportFormatJupyter ,
notebookPath : "/dir/foo" ,
filePath : "/dir/foo.scala" ,
// Jupyter notebooks would correspond to foo.ipynb so an error is not expected.
expectedError : "" ,
} ,
{
name : "sql jupyter notebook and file with source extension" ,
language : workspace . LanguageSql ,
notebookExportFormat : workspace . ExportFormatJupyter ,
notebookPath : "/dir/foo" ,
filePath : "/dir/foo.sql" ,
// Jupyter notebooks would correspond to foo.ipynb so an error is not expected.
expectedError : "" ,
} ,
{
name : "r jupyter notebook and file with source extension" ,
language : workspace . LanguageR ,
notebookExportFormat : workspace . ExportFormatJupyter ,
notebookPath : "/dir/foo" ,
filePath : "/dir/foo.sql" ,
// Jupyter notebooks would correspond to foo.ipynb so an error is not expected.
expectedError : "" ,
} ,
{
name : "python jupyter notebook and file with .ipynb extension" ,
language : workspace . LanguagePython ,
notebookExportFormat : workspace . ExportFormatJupyter ,
notebookPath : "/dir/foo" ,
filePath : "/dir/foo.ipynb" ,
expectedError : "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue" ,
} ,
{
name : "scala jupyter notebook and file with .ipynb extension" ,
language : workspace . LanguageScala ,
notebookExportFormat : workspace . ExportFormatJupyter ,
notebookPath : "/dir/foo" ,
filePath : "/dir/foo.ipynb" ,
expectedError : "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue" ,
} ,
{
name : "r jupyter notebook and file with .ipynb extension" ,
language : workspace . LanguageR ,
notebookExportFormat : workspace . ExportFormatJupyter ,
notebookPath : "/dir/foo" ,
filePath : "/dir/foo.ipynb" ,
expectedError : "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue" ,
} ,
{
name : "sql jupyter notebook and file with .ipynb extension" ,
language : workspace . LanguageSql ,
notebookExportFormat : workspace . ExportFormatJupyter ,
notebookPath : "/dir/foo" ,
2024-08-21 07:45:25 +00:00
filePath : "/dir/foo.ipynb" ,
expectedError : "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue" ,
} ,
} {
t . Run ( tc . name , func ( t * testing . T ) {
mockedWorkspaceClient := mocks . NewMockWorkspaceClient ( t )
mockedApiClient := mockApiClient { }
// Mock the workspace API's ListAll method.
workspaceApi := mockedWorkspaceClient . GetMockWorkspaceAPI ( )
workspaceApi . EXPECT ( ) . ListAll ( mock . Anything , workspace . ListWorkspaceRequest {
Path : "/dir" ,
} ) . Return ( [ ] workspace . ObjectInfo {
{
Path : tc . filePath ,
Language : tc . language ,
ObjectType : workspace . ObjectTypeFile ,
} ,
{
Path : tc . notebookPath ,
Language : tc . language ,
ObjectType : workspace . ObjectTypeNotebook ,
} ,
} , nil )
// Mock bespoke API calls to /api/2.0/workspace/get-status, that are
// used to figure out the right file extension for the notebook.
statNotebook := wsfsFileInfo {
ObjectInfo : workspace . ObjectInfo {
Path : tc . notebookPath ,
Language : tc . language ,
ObjectType : workspace . ObjectTypeNotebook ,
} ,
ReposExportFormat : tc . notebookExportFormat ,
}
mockedApiClient . On ( "Do" , mock . Anything , http . MethodGet , "/api/2.0/workspace/get-status" , map [ string ] string ( nil ) , map [ string ] string {
"path" : tc . notebookPath ,
"return_export_info" : "true" ,
} , mock . AnythingOfType ( "*filer.wsfsFileInfo" ) , [ ] func ( * http . Request ) error ( nil ) ) . Return ( nil , statNotebook )
2024-12-02 21:18:07 +00:00
workspaceFilesClient := WorkspaceFilesClient {
2024-08-21 07:45:25 +00:00
workspaceClient : mockedWorkspaceClient . WorkspaceClient ,
apiClient : & mockedApiClient ,
root : NewWorkspaceRootPath ( "/dir" ) ,
}
workspaceFilesExtensionsClient := workspaceFilesExtensionsClient {
workspaceClient : mockedWorkspaceClient . WorkspaceClient ,
wsfs : & workspaceFilesClient ,
}
_ , err := workspaceFilesExtensionsClient . ReadDir ( context . Background ( ) , "/" )
if tc . expectedError == "" {
assert . NoError ( t , err )
} else {
assert . ErrorAs ( t , err , & duplicatePathError { } )
assert . EqualError ( t , err , tc . expectedError )
}
// assert the mocked methods were actually called, as a sanity check.
workspaceApi . AssertNumberOfCalls ( t , "ListAll" , 1 )
mockedApiClient . AssertNumberOfCalls ( t , "Do" , 1 )
} )
}
}