2024-05-30 11:59:27 +00:00
package filer
import (
"context"
"errors"
"fmt"
"io"
"io/fs"
"path"
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
"slices"
2024-05-30 11:59:27 +00:00
"strings"
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/notebook"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/service/workspace"
)
2025-01-20 12:09:28 +00:00
type WorkspaceFilesExtensionsClient struct {
2024-05-30 11:59:27 +00:00
workspaceClient * databricks . WorkspaceClient
2024-07-18 14:17:42 +00:00
wsfs Filer
root string
readonly bool
2024-05-30 11:59:27 +00:00
}
type workspaceFileStatus struct {
2024-07-05 11:32:29 +00:00
wsfsFileInfo
2024-05-30 11:59:27 +00:00
// Name of the file to be used in any API calls made using the workspace files
// filer. For notebooks this path does not include the extension.
nameForWorkspaceAPI string
}
2025-01-20 12:09:28 +00:00
func ( w * WorkspaceFilesExtensionsClient ) stat ( ctx context . Context , name string ) ( wsfsFileInfo , error ) {
2024-07-05 11:32:29 +00:00
info , err := w . wsfs . Stat ( ctx , name )
2024-05-30 11:59:27 +00:00
if err != nil {
2024-07-05 11:32:29 +00:00
return wsfsFileInfo { } , err
2024-05-30 11:59:27 +00:00
}
2024-07-05 11:32:29 +00:00
return info . ( wsfsFileInfo ) , err
2024-05-30 11:59:27 +00:00
}
// This function returns the stat for the provided notebook. The stat object itself contains the path
// with the extension since it is meant to be used in the context of a fs.FileInfo.
2025-01-20 12:09:28 +00:00
func ( w * WorkspaceFilesExtensionsClient ) getNotebookStatByNameWithExt ( ctx context . Context , name string ) ( * workspaceFileStatus , error ) {
2024-05-30 11:59:27 +00:00
ext := path . Ext ( name )
nameWithoutExt := strings . TrimSuffix ( name , ext )
// File name does not have an extension associated with Databricks notebooks, return early.
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
if ! slices . Contains ( [ ] string {
notebook . ExtensionPython ,
notebook . ExtensionR ,
notebook . ExtensionScala ,
notebook . ExtensionSql ,
notebook . ExtensionJupyter ,
} , ext ) {
2024-05-30 11:59:27 +00:00
return nil , nil
}
// If the file could be a notebook, check if it is and has the correct language.
stat , err := w . stat ( ctx , nameWithoutExt )
if err != nil {
// If the file does not exist, return early.
if errors . As ( err , & FileDoesNotExistError { } ) {
return nil , nil
}
log . Debugf ( ctx , "attempting to determine if %s could be a notebook. Failed to fetch the status of object at %s: %s" , name , path . Join ( w . root , nameWithoutExt ) , err )
return nil , err
}
// Not a notebook. Return early.
if stat . ObjectType != workspace . ObjectTypeNotebook {
log . Debugf ( ctx , "attempting to determine if %s could be a notebook. Found an object at %s but it is not a notebook. It is a %s." , name , path . Join ( w . root , nameWithoutExt ) , stat . ObjectType )
return nil , nil
}
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
// Not the correct language. Return early. Note: All languages are supported
// for Jupyter notebooks.
if ext != notebook . ExtensionJupyter && stat . Language != notebook . ExtensionToLanguage [ ext ] {
log . Debugf ( ctx , "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not of the correct language. Expected %s but found %s." , name , path . Join ( w . root , nameWithoutExt ) , notebook . ExtensionToLanguage [ ext ] , stat . Language )
2024-05-30 11:59:27 +00:00
return nil , nil
}
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
// For non-jupyter notebooks the export format should be source.
2024-05-30 11:59:27 +00:00
// If it's not, return early.
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
if ext != notebook . ExtensionJupyter && stat . ReposExportFormat != workspace . ExportFormatSource {
2024-05-30 11:59:27 +00:00
log . Debugf ( ctx , "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not exported as a source notebook. Its export format is %s." , name , path . Join ( w . root , nameWithoutExt ) , stat . ReposExportFormat )
return nil , nil
}
// When the extension is .ipynb we expect the export format to be Jupyter.
// If it's not, return early.
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
if ext == notebook . ExtensionJupyter && stat . ReposExportFormat != workspace . ExportFormatJupyter {
2024-05-30 11:59:27 +00:00
log . Debugf ( ctx , "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not exported as a Jupyter notebook. Its export format is %s." , name , path . Join ( w . root , nameWithoutExt ) , stat . ReposExportFormat )
return nil , nil
}
// Modify the stat object path to include the extension. This stat object will be used
// to return the fs.FileInfo object in the stat method.
stat . Path = stat . Path + ext
2024-07-05 11:32:29 +00:00
return & workspaceFileStatus {
wsfsFileInfo : stat ,
nameForWorkspaceAPI : nameWithoutExt ,
} , nil
2024-05-30 11:59:27 +00:00
}
2025-01-20 12:09:28 +00:00
func ( w * WorkspaceFilesExtensionsClient ) getNotebookStatByNameWithoutExt ( ctx context . Context , name string ) ( * workspaceFileStatus , error ) {
2024-05-30 11:59:27 +00:00
stat , err := w . stat ( ctx , name )
if err != nil {
return nil , err
}
// We expect this internal function to only be called from [ReadDir] when we are sure
// that the object is a notebook. Thus, this should never happen.
if stat . ObjectType != workspace . ObjectTypeNotebook {
return nil , fmt . Errorf ( "expected object at %s to be a notebook but it is a %s" , path . Join ( w . root , name ) , stat . ObjectType )
}
// Get the extension for the notebook.
2024-07-05 11:32:29 +00:00
ext := notebook . GetExtensionByLanguage ( & stat . ObjectInfo )
2024-05-30 11:59:27 +00:00
// If the notebook was exported as a Jupyter notebook, the extension should be .ipynb.
Add support for non-Python ipynb notebooks to DABs (#1827)
## Changes
### Background
The workspace import APIs recently added support for importing Jupyter
notebooks written in R, Scala, or SQL, that is non-Python notebooks.
This now works for the `/import-file` API which we leverage in the CLI.
Note: We do not need any changes in `databricks sync`. It works out of
the box because any state mapping of local names to remote names that we
store is only scoped to the notebook extension (i.e., `.ipynb` in this
case) and is agnostic of the notebook's specific language.
### Problem this PR addresses
The extension-aware filer previously did not function because it checks
that a `.ipynb` notebook is written in Python. This PR relaxes that
constraint and adds integration tests for both the normal workspace
filer and extensions aware filer writing and reading non-Python `.ipynb`
notebooks.
This implies that after this PR DABs in the workspace / CLI from DBR
will work for non-Python notebooks as well. non-Python notebooks for
DABs deployment from local machines already works after the platform
side changes to the API landed, this PR just adds integration tests for
that bit of functionality.
Note: Any platform side changes we needed for the import API have
already been rolled out to production.
### Before
DABs deploy would work fine for non-Python notebooks. But DABs
deployments from DBR would not.
### After
DABs deploys both from local machines and DBR will work fine.
## Testing
For creating the `.ipynb` notebook fixtures used in the integration
tests I created them directly from the VSCode UI. This ensures high
fidelity with how users will create their non-Python notebooks locally.
For Python notebooks this is supported out of the box by VSCode but for
R and Scala notebooks this requires installing the Jupyter kernel for R
and Scala on my local machine and using that from VSCode.
For SQL, I ended up directly modifying the `language_info` field in the
Jupyter metadata to create the test fixture.
### Discussion: Issues with configuring language at the cell level
The language metadata for a Jupyter notebook is standardized at the
notebook level (in the `language_info` field). Unfortunately, it's not
standardized at the cell level. Thus, for example, if a user changes the
language for their cell in VSCode (which is supported by the standard
Jupyter VSCode integration), it'll cause a runtime error when the user
actually attempts to run the notebook. This is because the cell-level
metadata is encoded in a format specific to VSCode:
```
cells: []{
"vscode": {
"languageId": "sql"
}
}
```
Supporting cell level languages is thus out of scope for this PR and can
be revisited along with the workspace files team if there's strong
customer interest.
2024-11-13 21:39:51 +00:00
if stat . ReposExportFormat == workspace . ExportFormatJupyter {
ext = notebook . ExtensionJupyter
2024-05-30 11:59:27 +00:00
}
// Modify the stat object path to include the extension. This stat object will be used
// to return the fs.DirEntry object in the ReadDir method.
stat . Path = stat . Path + ext
2024-07-05 11:32:29 +00:00
return & workspaceFileStatus {
wsfsFileInfo : stat ,
nameForWorkspaceAPI : name ,
} , nil
2024-05-30 11:59:27 +00:00
}
2024-08-21 07:45:25 +00:00
type duplicatePathError struct {
2024-05-30 11:59:27 +00:00
oi1 workspace . ObjectInfo
oi2 workspace . ObjectInfo
commonName string
}
2024-08-21 07:45:25 +00:00
func ( e duplicatePathError ) Error ( ) string {
2024-05-30 11:59:27 +00:00
return fmt . Sprintf ( "failed to read files from the workspace file system. Duplicate paths encountered. Both %s at %s and %s at %s resolve to the same name %s. Changing the name of one of these objects will resolve this issue" , e . oi1 . ObjectType , e . oi1 . Path , e . oi2 . ObjectType , e . oi2 . Path , e . commonName )
}
2024-07-18 14:17:42 +00:00
type ReadOnlyError struct {
op string
}
func ( e ReadOnlyError ) Error ( ) string {
return fmt . Sprintf ( "failed to %s: filer is in read-only mode" , e . op )
}
2024-05-30 11:59:27 +00:00
// This is a filer for the workspace file system that allows you to pretend the
// workspace file system is a traditional file system. It allows you to list, read, write,
// delete, and stat notebooks (and files in general) in the workspace, using their paths
// with the extension included.
//
2024-08-21 07:45:25 +00:00
// The ReadDir method returns a duplicatePathError if this traditional file system view is
2024-05-30 11:59:27 +00:00
// not possible. For example, a Python notebook called foo and a Python file called `foo.py`
// would resolve to the same path `foo.py` in a tradition file system.
//
// Users of this filer should be careful when using the Write and Mkdir methods.
// The underlying import API we use to upload notebooks and files returns opaque internal
// errors for namespace clashes (e.g. a file and a notebook or a directory and a notebook).
// Thus users of these methods should be careful to avoid such clashes.
func NewWorkspaceFilesExtensionsClient ( w * databricks . WorkspaceClient , root string ) ( Filer , error ) {
2024-07-18 14:17:42 +00:00
return newWorkspaceFilesExtensionsClient ( w , root , false )
}
func NewReadOnlyWorkspaceFilesExtensionsClient ( w * databricks . WorkspaceClient , root string ) ( Filer , error ) {
return newWorkspaceFilesExtensionsClient ( w , root , true )
}
func newWorkspaceFilesExtensionsClient ( w * databricks . WorkspaceClient , root string , readonly bool ) ( Filer , error ) {
2024-05-30 11:59:27 +00:00
filer , err := NewWorkspaceFilesClient ( w , root )
if err != nil {
return nil , err
}
2024-07-18 14:17:42 +00:00
if readonly {
// Wrap in a readahead cache to avoid making unnecessary calls to the workspace.
filer = newWorkspaceFilesReadaheadCache ( filer )
}
2025-01-20 12:09:28 +00:00
return & WorkspaceFilesExtensionsClient {
2024-05-30 11:59:27 +00:00
workspaceClient : w ,
2024-07-18 14:17:42 +00:00
wsfs : filer ,
root : root ,
readonly : readonly ,
2024-05-30 11:59:27 +00:00
} , nil
}
2025-01-20 12:09:28 +00:00
func ( w * WorkspaceFilesExtensionsClient ) ReadDir ( ctx context . Context , name string ) ( [ ] fs . DirEntry , error ) {
2024-05-30 11:59:27 +00:00
entries , err := w . wsfs . ReadDir ( ctx , name )
if err != nil {
return nil , err
}
seenPaths := make ( map [ string ] workspace . ObjectInfo )
for i := range entries {
info , err := entries [ i ] . Info ( )
if err != nil {
return nil , err
}
sysInfo := info . Sys ( ) . ( workspace . ObjectInfo )
// If the object is a notebook, include an extension in the entry.
if sysInfo . ObjectType == workspace . ObjectTypeNotebook {
2024-06-04 09:53:14 +00:00
stat , err := w . getNotebookStatByNameWithoutExt ( ctx , path . Join ( name , entries [ i ] . Name ( ) ) )
2024-05-30 11:59:27 +00:00
if err != nil {
return nil , err
}
// Replace the entry with the new entry that includes the extension.
2024-07-05 11:32:29 +00:00
entries [ i ] = wsfsDirEntry { wsfsFileInfo { ObjectInfo : stat . ObjectInfo } }
2024-05-30 11:59:27 +00:00
}
// Error if we have seen this path before in the current directory.
// If not seen before, add it to the seen paths.
if _ , ok := seenPaths [ entries [ i ] . Name ( ) ] ; ok {
2024-08-21 07:45:25 +00:00
return nil , duplicatePathError {
2024-05-30 11:59:27 +00:00
oi1 : seenPaths [ entries [ i ] . Name ( ) ] ,
oi2 : sysInfo ,
commonName : path . Join ( name , entries [ i ] . Name ( ) ) ,
}
}
seenPaths [ entries [ i ] . Name ( ) ] = sysInfo
}
return entries , nil
}
// Note: The import API returns opaque internal errors for namespace clashes
// (e.g. a file and a notebook or a directory and a notebook). Thus users of this
// method should be careful to avoid such clashes.
2025-01-20 12:09:28 +00:00
func ( w * WorkspaceFilesExtensionsClient ) Write ( ctx context . Context , name string , reader io . Reader , mode ... WriteMode ) error {
2024-07-18 14:17:42 +00:00
if w . readonly {
return ReadOnlyError { "write" }
}
2024-05-30 11:59:27 +00:00
return w . wsfs . Write ( ctx , name , reader , mode ... )
}
// Try to read the file as a regular file. If the file is not found, try to read it as a notebook.
2025-01-20 12:09:28 +00:00
func ( w * WorkspaceFilesExtensionsClient ) Read ( ctx context . Context , name string ) ( io . ReadCloser , error ) {
2024-11-18 17:25:24 +00:00
// Ensure that the file / notebook exists. We do this check here to avoid reading
// the content of a notebook called `foo` when the user actually wanted
// to read the content of a file called `foo`.
//
// To read the content of a notebook called `foo` in the workspace the user
// should use the name with the extension included like `foo.ipynb` or `foo.sql`.
_ , err := w . Stat ( ctx , name )
if err != nil {
return nil , err
}
2024-05-30 11:59:27 +00:00
r , err := w . wsfs . Read ( ctx , name )
// If the file is not found, it might be a notebook.
if errors . As ( err , & FileDoesNotExistError { } ) {
stat , serr := w . getNotebookStatByNameWithExt ( ctx , name )
if serr != nil {
// Unable to stat. Return the stat error.
return nil , serr
}
if stat == nil {
// Not a notebook. Return the original error.
return nil , err
}
// The workspace files filer performs an additional stat call to make sure
// the path is not a directory. We can skip this step since we already have
// the stat object and know that the path is a notebook.
return w . workspaceClient . Workspace . Download (
ctx ,
path . Join ( w . root , stat . nameForWorkspaceAPI ) ,
workspace . DownloadFormat ( stat . ReposExportFormat ) ,
)
}
return r , err
}
// Try to delete the file as a regular file. If the file is not found, try to delete it as a notebook.
2025-01-20 12:09:28 +00:00
func ( w * WorkspaceFilesExtensionsClient ) Delete ( ctx context . Context , name string , mode ... DeleteMode ) error {
2024-07-18 14:17:42 +00:00
if w . readonly {
return ReadOnlyError { "delete" }
}
2024-11-18 17:25:24 +00:00
// Ensure that the file / notebook exists. We do this check here to avoid
// deleting the a notebook called `foo` when the user actually wanted to
// delete a file called `foo`.
//
// To delete a notebook called `foo` in the workspace the user should use the
// name with the extension included like `foo.ipynb` or `foo.sql`.
_ , err := w . Stat ( ctx , name )
if err != nil {
return err
}
err = w . wsfs . Delete ( ctx , name , mode ... )
2024-05-30 11:59:27 +00:00
// If the file is not found, it might be a notebook.
if errors . As ( err , & FileDoesNotExistError { } ) {
stat , serr := w . getNotebookStatByNameWithExt ( ctx , name )
if serr != nil {
// Unable to stat. Return the stat error.
return serr
}
if stat == nil {
// Not a notebook. Return the original error.
return err
}
return w . wsfs . Delete ( ctx , stat . nameForWorkspaceAPI , mode ... )
}
return err
}
// Try to stat the file as a regular file. If the file is not found, try to stat it as a notebook.
2025-01-20 12:09:28 +00:00
func ( w * WorkspaceFilesExtensionsClient ) Stat ( ctx context . Context , name string ) ( fs . FileInfo , error ) {
2024-05-30 11:59:27 +00:00
info , err := w . wsfs . Stat ( ctx , name )
// If the file is not found, it might be a notebook.
if errors . As ( err , & FileDoesNotExistError { } ) {
stat , serr := w . getNotebookStatByNameWithExt ( ctx , name )
if serr != nil {
// Unable to stat. Return the stat error.
return nil , serr
}
if stat == nil {
// Not a notebook. Return the original error.
return nil , err
}
2024-07-05 11:32:29 +00:00
return wsfsFileInfo { ObjectInfo : stat . ObjectInfo } , nil
2024-05-30 11:59:27 +00:00
}
2024-11-18 17:25:24 +00:00
if err != nil {
return nil , err
}
// If an object is found and it is a notebook, return a FileDoesNotExistError.
// If a notebook is found by the workspace files client, without having stripped
// the extension, this implies that no file with the same name exists.
//
// This check is done to avoid returning the stat for a notebook called `foo`
// when the user actually wanted to stat a file called `foo`.
//
// To stat the metadata of a notebook called `foo` in the workspace the user
// should use the name with the extension included like `foo.ipynb` or `foo.sql`.
if info . Sys ( ) . ( workspace . ObjectInfo ) . ObjectType == workspace . ObjectTypeNotebook {
return nil , FileDoesNotExistError { name }
}
return info , nil
2024-05-30 11:59:27 +00:00
}
// Note: The import API returns opaque internal errors for namespace clashes
// (e.g. a file and a notebook or a directory and a notebook). Thus users of this
// method should be careful to avoid such clashes.
2025-01-20 12:09:28 +00:00
func ( w * WorkspaceFilesExtensionsClient ) Mkdir ( ctx context . Context , name string ) error {
2024-07-18 14:17:42 +00:00
if w . readonly {
return ReadOnlyError { "mkdir" }
}
2024-05-30 11:59:27 +00:00
return w . wsfs . Mkdir ( ctx , name )
}