2022-12-14 14:37:14 +00:00
package internal
import (
2023-05-31 11:24:20 +00:00
"bytes"
2022-12-14 14:37:14 +00:00
"context"
2024-05-30 11:59:27 +00:00
"encoding/json"
2022-12-14 14:37:14 +00:00
"errors"
2024-05-30 11:59:27 +00:00
"fmt"
2022-12-14 14:37:14 +00:00
"io"
2023-05-31 12:22:26 +00:00
"io/fs"
2024-05-30 11:59:27 +00:00
"path"
2023-06-12 19:03:46 +00:00
"regexp"
2022-12-14 14:37:14 +00:00
"strings"
"testing"
2023-05-16 16:35:39 +00:00
"github.com/databricks/cli/libs/filer"
2022-12-14 14:37:14 +00:00
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
type filerTest struct {
* testing . T
filer . Filer
}
func ( f filerTest ) assertContents ( ctx context . Context , name string , contents string ) {
reader , err := f . Read ( ctx , name )
if ! assert . NoError ( f , err ) {
return
}
2023-06-12 13:53:58 +00:00
defer reader . Close ( )
2023-05-31 11:24:20 +00:00
var body bytes . Buffer
_ , err = io . Copy ( & body , reader )
2022-12-14 14:37:14 +00:00
if ! assert . NoError ( f , err ) {
return
}
2023-05-31 11:24:20 +00:00
assert . Equal ( f , contents , body . String ( ) )
2022-12-14 14:37:14 +00:00
}
2024-05-30 11:59:27 +00:00
func ( f filerTest ) assertContentsJupyter ( ctx context . Context , name string ) {
reader , err := f . Read ( ctx , name )
if ! assert . NoError ( f , err ) {
return
}
defer reader . Close ( )
var body bytes . Buffer
_ , err = io . Copy ( & body , reader )
if ! assert . NoError ( f , err ) {
return
}
var actual map [ string ] any
err = json . Unmarshal ( body . Bytes ( ) , & actual )
if ! assert . NoError ( f , err ) {
return
}
// Since a roundtrip to the workspace changes a Jupyter notebook's payload,
// the best we can do is assert that the nbformat is correct.
assert . EqualValues ( f , 4 , actual [ "nbformat" ] )
}
func ( f filerTest ) assertNotExists ( ctx context . Context , name string ) {
_ , err := f . Stat ( ctx , name )
assert . ErrorIs ( f , err , fs . ErrNotExist )
}
2024-02-20 16:14:37 +00:00
func commonFilerRecursiveDeleteTest ( t * testing . T , ctx context . Context , f filer . Filer ) {
2023-05-31 09:11:17 +00:00
var err error
2024-02-20 16:14:37 +00:00
err = f . Write ( ctx , "dir/file1" , strings . NewReader ( "content1" ) , filer . CreateParentDirectories )
require . NoError ( t , err )
filerTest { t , f } . assertContents ( ctx , "dir/file1" , ` content1 ` )
err = f . Write ( ctx , "dir/file2" , strings . NewReader ( "content2" ) , filer . CreateParentDirectories )
require . NoError ( t , err )
filerTest { t , f } . assertContents ( ctx , "dir/file2" , ` content2 ` )
err = f . Write ( ctx , "dir/subdir1/file3" , strings . NewReader ( "content3" ) , filer . CreateParentDirectories )
require . NoError ( t , err )
filerTest { t , f } . assertContents ( ctx , "dir/subdir1/file3" , ` content3 ` )
err = f . Write ( ctx , "dir/subdir1/file4" , strings . NewReader ( "content4" ) , filer . CreateParentDirectories )
require . NoError ( t , err )
filerTest { t , f } . assertContents ( ctx , "dir/subdir1/file4" , ` content4 ` )
err = f . Write ( ctx , "dir/subdir2/file5" , strings . NewReader ( "content5" ) , filer . CreateParentDirectories )
require . NoError ( t , err )
filerTest { t , f } . assertContents ( ctx , "dir/subdir2/file5" , ` content5 ` )
err = f . Write ( ctx , "dir/subdir2/file6" , strings . NewReader ( "content6" ) , filer . CreateParentDirectories )
require . NoError ( t , err )
filerTest { t , f } . assertContents ( ctx , "dir/subdir2/file6" , ` content6 ` )
entriesBeforeDelete , err := f . ReadDir ( ctx , "dir" )
require . NoError ( t , err )
assert . Len ( t , entriesBeforeDelete , 4 )
names := [ ] string { }
for _ , e := range entriesBeforeDelete {
names = append ( names , e . Name ( ) )
}
assert . Equal ( t , names , [ ] string { "file1" , "file2" , "subdir1" , "subdir2" } )
err = f . Delete ( ctx , "dir" )
assert . ErrorAs ( t , err , & filer . DirectoryNotEmptyError { } )
err = f . Delete ( ctx , "dir" , filer . DeleteRecursively )
assert . NoError ( t , err )
_ , err = f . ReadDir ( ctx , "dir" )
assert . ErrorAs ( t , err , & filer . NoSuchDirectoryError { } )
}
func TestAccFilerRecursiveDelete ( t * testing . T ) {
t . Parallel ( )
for _ , testCase := range [ ] struct {
name string
f func ( t * testing . T ) ( filer . Filer , string )
} {
{ "local" , setupLocalFiler } ,
{ "workspace files" , setupWsfsFiler } ,
{ "dbfs" , setupDbfsFiler } ,
{ "files" , setupUcVolumesFiler } ,
2024-05-30 11:59:27 +00:00
{ "workspace files extensions" , setupWsfsExtensionsFiler } ,
2024-02-20 16:14:37 +00:00
} {
tc := testCase
t . Run ( testCase . name , func ( t * testing . T ) {
t . Parallel ( )
f , _ := tc . f ( t )
ctx := context . Background ( )
// Common tests we run across all filers to ensure consistent behavior.
commonFilerRecursiveDeleteTest ( t , ctx , f )
} )
}
}
// Common tests we run across all filers to ensure consistent behavior.
func commonFilerReadWriteTests ( t * testing . T , ctx context . Context , f filer . Filer ) {
var err error
// Write should fail because the intermediate directory doesn't exist.
2023-01-05 11:03:31 +00:00
err = f . Write ( ctx , "/foo/bar" , strings . NewReader ( ` hello world ` ) )
2022-12-14 14:37:14 +00:00
assert . True ( t , errors . As ( err , & filer . NoSuchDirectoryError { } ) )
2023-05-31 18:47:00 +00:00
assert . True ( t , errors . Is ( err , fs . ErrNotExist ) )
2022-12-14 14:37:14 +00:00
2024-02-20 16:14:37 +00:00
// Read should fail because the intermediate directory doesn't yet exist.
2022-12-14 14:37:14 +00:00
_ , err = f . Read ( ctx , "/foo/bar" )
2023-05-31 11:24:20 +00:00
assert . True ( t , errors . As ( err , & filer . FileDoesNotExistError { } ) )
2023-05-31 18:47:00 +00:00
assert . True ( t , errors . Is ( err , fs . ErrNotExist ) )
2022-12-14 14:37:14 +00:00
2023-06-08 16:15:12 +00:00
// Read should fail because the path points to a directory
err = f . Mkdir ( ctx , "/dir" )
require . NoError ( t , err )
_ , err = f . Read ( ctx , "/dir" )
assert . ErrorIs ( t , err , fs . ErrInvalid )
2022-12-14 14:37:14 +00:00
// Write with CreateParentDirectories flag should succeed.
2023-01-05 11:03:31 +00:00
err = f . Write ( ctx , "/foo/bar" , strings . NewReader ( ` hello world ` ) , filer . CreateParentDirectories )
2022-12-14 14:37:14 +00:00
assert . NoError ( t , err )
2023-01-05 11:03:31 +00:00
filerTest { t , f } . assertContents ( ctx , "/foo/bar" , ` hello world ` )
2022-12-14 14:37:14 +00:00
// Write should fail because there is an existing file at the specified path.
2023-01-05 11:03:31 +00:00
err = f . Write ( ctx , "/foo/bar" , strings . NewReader ( ` hello universe ` ) )
2022-12-14 14:37:14 +00:00
assert . True ( t , errors . As ( err , & filer . FileAlreadyExistsError { } ) )
2023-05-31 18:47:00 +00:00
assert . True ( t , errors . Is ( err , fs . ErrExist ) )
2022-12-14 14:37:14 +00:00
// Write with OverwriteIfExists should succeed.
2023-01-05 11:03:31 +00:00
err = f . Write ( ctx , "/foo/bar" , strings . NewReader ( ` hello universe ` ) , filer . OverwriteIfExists )
2022-12-14 14:37:14 +00:00
assert . NoError ( t , err )
2023-01-05 11:03:31 +00:00
filerTest { t , f } . assertContents ( ctx , "/foo/bar" , ` hello universe ` )
2022-12-14 14:37:14 +00:00
2023-06-06 06:27:47 +00:00
// Write should succeed if there is no existing file at the specified path.
err = f . Write ( ctx , "/foo/qux" , strings . NewReader ( ` hello universe ` ) )
assert . NoError ( t , err )
2023-06-01 18:23:22 +00:00
// Stat on a directory should succeed.
2023-06-19 18:29:13 +00:00
// Note: size and modification time behave differently between backends.
2023-06-01 18:23:22 +00:00
info , err := f . Stat ( ctx , "/foo" )
require . NoError ( t , err )
assert . Equal ( t , "foo" , info . Name ( ) )
assert . True ( t , info . Mode ( ) . IsDir ( ) )
assert . Equal ( t , true , info . IsDir ( ) )
// Stat on a file should succeed.
2023-06-19 18:29:13 +00:00
// Note: size and modification time behave differently between backends.
2023-06-01 18:23:22 +00:00
info , err = f . Stat ( ctx , "/foo/bar" )
require . NoError ( t , err )
assert . Equal ( t , "bar" , info . Name ( ) )
assert . True ( t , info . Mode ( ) . IsRegular ( ) )
assert . Equal ( t , false , info . IsDir ( ) )
2022-12-14 14:37:14 +00:00
// Delete should fail if the file doesn't exist.
err = f . Delete ( ctx , "/doesnt_exist" )
2024-02-20 16:14:37 +00:00
assert . ErrorAs ( t , err , & filer . FileDoesNotExistError { } )
2023-05-31 18:47:00 +00:00
assert . True ( t , errors . Is ( err , fs . ErrNotExist ) )
2022-12-14 14:37:14 +00:00
2023-06-01 18:23:22 +00:00
// Stat should fail if the file doesn't exist.
_ , err = f . Stat ( ctx , "/doesnt_exist" )
2024-02-20 16:14:37 +00:00
assert . ErrorAs ( t , err , & filer . FileDoesNotExistError { } )
2023-06-01 18:23:22 +00:00
assert . True ( t , errors . Is ( err , fs . ErrNotExist ) )
2022-12-14 14:37:14 +00:00
// Delete should succeed for file that does exist.
err = f . Delete ( ctx , "/foo/bar" )
assert . NoError ( t , err )
2023-06-06 06:27:47 +00:00
// Delete should fail for a non-empty directory.
err = f . Delete ( ctx , "/foo" )
2024-02-20 16:14:37 +00:00
assert . ErrorAs ( t , err , & filer . DirectoryNotEmptyError { } )
2023-06-06 06:27:47 +00:00
assert . True ( t , errors . Is ( err , fs . ErrInvalid ) )
// Delete should succeed for a non-empty directory if the DeleteRecursively flag is set.
err = f . Delete ( ctx , "/foo" , filer . DeleteRecursively )
assert . NoError ( t , err )
// Delete of the filer root should ALWAYS fail, otherwise subsequent writes would fail.
// It is not in the filer's purview to delete its root directory.
err = f . Delete ( ctx , "/" )
assert . True ( t , errors . As ( err , & filer . CannotDeleteRootError { } ) )
assert . True ( t , errors . Is ( err , fs . ErrInvalid ) )
2022-12-14 14:37:14 +00:00
}
2023-05-31 09:11:17 +00:00
2024-02-20 16:14:37 +00:00
func TestAccFilerReadWrite ( t * testing . T ) {
t . Parallel ( )
for _ , testCase := range [ ] struct {
name string
f func ( t * testing . T ) ( filer . Filer , string )
} {
{ "local" , setupLocalFiler } ,
{ "workspace files" , setupWsfsFiler } ,
{ "dbfs" , setupDbfsFiler } ,
{ "files" , setupUcVolumesFiler } ,
2024-05-30 11:59:27 +00:00
{ "workspace files extensions" , setupWsfsExtensionsFiler } ,
2024-02-20 16:14:37 +00:00
} {
tc := testCase
t . Run ( testCase . name , func ( t * testing . T ) {
t . Parallel ( )
f , _ := tc . f ( t )
ctx := context . Background ( )
// Common tests we run across all filers to ensure consistent behavior.
commonFilerReadWriteTests ( t , ctx , f )
} )
}
}
// Common tests we run across all filers to ensure consistent behavior.
func commonFilerReadDirTest ( t * testing . T , ctx context . Context , f filer . Filer ) {
2023-05-31 09:11:17 +00:00
var err error
2023-05-31 12:22:26 +00:00
var info fs . FileInfo
2023-05-31 09:11:17 +00:00
// We start with an empty directory.
entries , err := f . ReadDir ( ctx , "." )
require . NoError ( t , err )
assert . Len ( t , entries , 0 )
// Write a file.
err = f . Write ( ctx , "/hello.txt" , strings . NewReader ( ` hello world ` ) )
require . NoError ( t , err )
// Create a directory.
err = f . Mkdir ( ctx , "/dir" )
require . NoError ( t , err )
// Write a file.
err = f . Write ( ctx , "/dir/world.txt" , strings . NewReader ( ` hello world ` ) )
require . NoError ( t , err )
// Create a nested directory (check that it creates intermediate directories).
err = f . Mkdir ( ctx , "/dir/a/b/c" )
require . NoError ( t , err )
// Expect an error if the path doesn't exist.
_ , err = f . ReadDir ( ctx , "/dir/a/b/c/d/e" )
2023-05-31 11:24:20 +00:00
assert . True ( t , errors . As ( err , & filer . NoSuchDirectoryError { } ) , err )
2023-05-31 18:47:00 +00:00
assert . True ( t , errors . Is ( err , fs . ErrNotExist ) )
2023-05-31 09:11:17 +00:00
// Expect two entries in the root.
entries , err = f . ReadDir ( ctx , "." )
require . NoError ( t , err )
assert . Len ( t , entries , 2 )
2023-05-31 12:22:26 +00:00
assert . Equal ( t , "dir" , entries [ 0 ] . Name ( ) )
assert . True ( t , entries [ 0 ] . IsDir ( ) )
assert . Equal ( t , "hello.txt" , entries [ 1 ] . Name ( ) )
assert . False ( t , entries [ 1 ] . IsDir ( ) )
info , err = entries [ 1 ] . Info ( )
require . NoError ( t , err )
assert . Greater ( t , info . ModTime ( ) . Unix ( ) , int64 ( 0 ) )
2023-05-31 09:11:17 +00:00
// Expect two entries in the directory.
entries , err = f . ReadDir ( ctx , "/dir" )
require . NoError ( t , err )
assert . Len ( t , entries , 2 )
2023-05-31 12:22:26 +00:00
assert . Equal ( t , "a" , entries [ 0 ] . Name ( ) )
assert . True ( t , entries [ 0 ] . IsDir ( ) )
assert . Equal ( t , "world.txt" , entries [ 1 ] . Name ( ) )
assert . False ( t , entries [ 1 ] . IsDir ( ) )
info , err = entries [ 1 ] . Info ( )
require . NoError ( t , err )
assert . Greater ( t , info . ModTime ( ) . Unix ( ) , int64 ( 0 ) )
2023-05-31 09:11:17 +00:00
// Expect a single entry in the nested path.
entries , err = f . ReadDir ( ctx , "/dir/a/b" )
require . NoError ( t , err )
assert . Len ( t , entries , 1 )
2023-05-31 12:22:26 +00:00
assert . Equal ( t , "c" , entries [ 0 ] . Name ( ) )
assert . True ( t , entries [ 0 ] . IsDir ( ) )
2023-06-02 10:28:35 +00:00
// Expect an error trying to call ReadDir on a file
_ , err = f . ReadDir ( ctx , "/hello.txt" )
assert . ErrorIs ( t , err , fs . ErrInvalid )
// Expect 0 entries for an empty directory
err = f . Mkdir ( ctx , "empty-dir" )
require . NoError ( t , err )
entries , err = f . ReadDir ( ctx , "empty-dir" )
assert . NoError ( t , err )
assert . Len ( t , entries , 0 )
// Expect one entry for a directory with a file in it
err = f . Write ( ctx , "dir-with-one-file/my-file.txt" , strings . NewReader ( "abc" ) , filer . CreateParentDirectories )
require . NoError ( t , err )
entries , err = f . ReadDir ( ctx , "dir-with-one-file" )
assert . NoError ( t , err )
assert . Len ( t , entries , 1 )
assert . Equal ( t , entries [ 0 ] . Name ( ) , "my-file.txt" )
assert . False ( t , entries [ 0 ] . IsDir ( ) )
2023-05-31 09:11:17 +00:00
}
2023-05-31 11:24:20 +00:00
2024-02-20 16:14:37 +00:00
func TestAccFilerReadDir ( t * testing . T ) {
t . Parallel ( )
for _ , testCase := range [ ] struct {
name string
f func ( t * testing . T ) ( filer . Filer , string )
} {
{ "local" , setupLocalFiler } ,
{ "workspace files" , setupWsfsFiler } ,
{ "dbfs" , setupDbfsFiler } ,
{ "files" , setupUcVolumesFiler } ,
2024-05-30 11:59:27 +00:00
{ "workspace files extensions" , setupWsfsExtensionsFiler } ,
2024-02-20 16:14:37 +00:00
} {
tc := testCase
t . Run ( testCase . name , func ( t * testing . T ) {
t . Parallel ( )
f , _ := tc . f ( t )
ctx := context . Background ( )
commonFilerReadDirTest ( t , ctx , f )
} )
2023-05-31 11:24:20 +00:00
}
}
2023-06-12 13:53:58 +00:00
2023-06-12 19:03:46 +00:00
var jupyterNotebookContent1 = `
{
"cells" : [
{
"cell_type" : "code" ,
"execution_count" : null ,
"metadata" : { } ,
"outputs" : [ ] ,
"source" : [
"print(\"Jupyter Notebook Version 1\")"
]
}
] ,
"metadata" : {
"language_info" : {
"name" : "python"
} ,
"orig_nbformat" : 4
} ,
"nbformat" : 4 ,
"nbformat_minor" : 2
}
`
var jupyterNotebookContent2 = `
{
"cells" : [
{
"cell_type" : "code" ,
"execution_count" : null ,
"metadata" : { } ,
"outputs" : [ ] ,
"source" : [
"print(\"Jupyter Notebook Version 2\")"
]
}
] ,
"metadata" : {
"language_info" : {
"name" : "python"
} ,
"orig_nbformat" : 4
} ,
"nbformat" : 4 ,
"nbformat_minor" : 2
}
`
func TestAccFilerWorkspaceNotebookConflict ( t * testing . T ) {
2024-05-30 11:59:27 +00:00
t . Parallel ( )
2024-02-20 16:14:37 +00:00
f , _ := setupWsfsFiler ( t )
ctx := context . Background ( )
2023-06-12 19:03:46 +00:00
var err error
// Upload the notebooks
err = f . Write ( ctx , "pyNb.py" , strings . NewReader ( "# Databricks notebook source\nprint('first upload'))" ) )
require . NoError ( t , err )
err = f . Write ( ctx , "rNb.r" , strings . NewReader ( "# Databricks notebook source\nprint('first upload'))" ) )
require . NoError ( t , err )
err = f . Write ( ctx , "sqlNb.sql" , strings . NewReader ( "-- Databricks notebook source\n SELECT \"first upload\"" ) )
require . NoError ( t , err )
err = f . Write ( ctx , "scalaNb.scala" , strings . NewReader ( "// Databricks notebook source\n println(\"first upload\"))" ) )
require . NoError ( t , err )
err = f . Write ( ctx , "jupyterNb.ipynb" , strings . NewReader ( jupyterNotebookContent1 ) )
require . NoError ( t , err )
// Assert contents after initial upload
filerTest { t , f } . assertContents ( ctx , "pyNb" , "# Databricks notebook source\nprint('first upload'))" )
filerTest { t , f } . assertContents ( ctx , "rNb" , "# Databricks notebook source\nprint('first upload'))" )
filerTest { t , f } . assertContents ( ctx , "sqlNb" , "-- Databricks notebook source\n SELECT \"first upload\"" )
filerTest { t , f } . assertContents ( ctx , "scalaNb" , "// Databricks notebook source\n println(\"first upload\"))" )
filerTest { t , f } . assertContents ( ctx , "jupyterNb" , "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")" )
// Assert uploading a second time fails due to overwrite mode missing
err = f . Write ( ctx , "pyNb.py" , strings . NewReader ( "# Databricks notebook source\nprint('second upload'))" ) )
assert . ErrorIs ( t , err , fs . ErrExist )
assert . Regexp ( t , regexp . MustCompile ( ` file already exists: .*/pyNb$ ` ) , err . Error ( ) )
err = f . Write ( ctx , "rNb.r" , strings . NewReader ( "# Databricks notebook source\nprint('second upload'))" ) )
assert . ErrorIs ( t , err , fs . ErrExist )
assert . Regexp ( t , regexp . MustCompile ( ` file already exists: .*/rNb$ ` ) , err . Error ( ) )
err = f . Write ( ctx , "sqlNb.sql" , strings . NewReader ( "# Databricks notebook source\n SELECT \"second upload\")" ) )
assert . ErrorIs ( t , err , fs . ErrExist )
assert . Regexp ( t , regexp . MustCompile ( ` file already exists: .*/sqlNb$ ` ) , err . Error ( ) )
err = f . Write ( ctx , "scalaNb.scala" , strings . NewReader ( "# Databricks notebook source\n println(\"second upload\"))" ) )
assert . ErrorIs ( t , err , fs . ErrExist )
assert . Regexp ( t , regexp . MustCompile ( ` file already exists: .*/scalaNb$ ` ) , err . Error ( ) )
err = f . Write ( ctx , "jupyterNb.ipynb" , strings . NewReader ( jupyterNotebookContent2 ) )
assert . ErrorIs ( t , err , fs . ErrExist )
assert . Regexp ( t , regexp . MustCompile ( ` file already exists: .*/jupyterNb$ ` ) , err . Error ( ) )
}
func TestAccFilerWorkspaceNotebookWithOverwriteFlag ( t * testing . T ) {
2024-05-30 11:59:27 +00:00
t . Parallel ( )
2024-02-20 16:14:37 +00:00
f , _ := setupWsfsFiler ( t )
ctx := context . Background ( )
2023-06-12 19:03:46 +00:00
var err error
// Upload notebooks
err = f . Write ( ctx , "pyNb.py" , strings . NewReader ( "# Databricks notebook source\nprint('first upload'))" ) )
require . NoError ( t , err )
err = f . Write ( ctx , "rNb.r" , strings . NewReader ( "# Databricks notebook source\nprint('first upload'))" ) )
require . NoError ( t , err )
err = f . Write ( ctx , "sqlNb.sql" , strings . NewReader ( "-- Databricks notebook source\n SELECT \"first upload\"" ) )
require . NoError ( t , err )
err = f . Write ( ctx , "scalaNb.scala" , strings . NewReader ( "// Databricks notebook source\n println(\"first upload\"))" ) )
require . NoError ( t , err )
err = f . Write ( ctx , "jupyterNb.ipynb" , strings . NewReader ( jupyterNotebookContent1 ) )
require . NoError ( t , err )
// Assert contents after initial upload
filerTest { t , f } . assertContents ( ctx , "pyNb" , "# Databricks notebook source\nprint('first upload'))" )
filerTest { t , f } . assertContents ( ctx , "rNb" , "# Databricks notebook source\nprint('first upload'))" )
filerTest { t , f } . assertContents ( ctx , "sqlNb" , "-- Databricks notebook source\n SELECT \"first upload\"" )
filerTest { t , f } . assertContents ( ctx , "scalaNb" , "// Databricks notebook source\n println(\"first upload\"))" )
filerTest { t , f } . assertContents ( ctx , "jupyterNb" , "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")" )
// Upload notebooks a second time, overwriting the initial uplaods
err = f . Write ( ctx , "pyNb.py" , strings . NewReader ( "# Databricks notebook source\nprint('second upload'))" ) , filer . OverwriteIfExists )
require . NoError ( t , err )
err = f . Write ( ctx , "rNb.r" , strings . NewReader ( "# Databricks notebook source\nprint('second upload'))" ) , filer . OverwriteIfExists )
require . NoError ( t , err )
err = f . Write ( ctx , "sqlNb.sql" , strings . NewReader ( "-- Databricks notebook source\n SELECT \"second upload\"" ) , filer . OverwriteIfExists )
require . NoError ( t , err )
err = f . Write ( ctx , "scalaNb.scala" , strings . NewReader ( "// Databricks notebook source\n println(\"second upload\"))" ) , filer . OverwriteIfExists )
require . NoError ( t , err )
err = f . Write ( ctx , "jupyterNb.ipynb" , strings . NewReader ( jupyterNotebookContent2 ) , filer . OverwriteIfExists )
require . NoError ( t , err )
// Assert contents have been overwritten
filerTest { t , f } . assertContents ( ctx , "pyNb" , "# Databricks notebook source\nprint('second upload'))" )
filerTest { t , f } . assertContents ( ctx , "rNb" , "# Databricks notebook source\nprint('second upload'))" )
filerTest { t , f } . assertContents ( ctx , "sqlNb" , "-- Databricks notebook source\n SELECT \"second upload\"" )
filerTest { t , f } . assertContents ( ctx , "scalaNb" , "// Databricks notebook source\n println(\"second upload\"))" )
filerTest { t , f } . assertContents ( ctx , "jupyterNb" , "# Databricks notebook source\nprint(\"Jupyter Notebook Version 2\")" )
}
2024-05-30 11:59:27 +00:00
func TestAccFilerWorkspaceFilesExtensionsReadDir ( t * testing . T ) {
t . Parallel ( )
files := [ ] struct {
name string
content string
} {
{ "dir1/dir2/dir3/file.txt" , "file content" } ,
2024-06-04 09:53:14 +00:00
{ "dir1/notebook.py" , "# Databricks notebook source\nprint('first upload'))" } ,
2024-05-30 11:59:27 +00:00
{ "foo.py" , "print('foo')" } ,
{ "foo.r" , "print('foo')" } ,
{ "foo.scala" , "println('foo')" } ,
{ "foo.sql" , "SELECT 'foo'" } ,
{ "jupyterNb.ipynb" , jupyterNotebookContent1 } ,
{ "jupyterNb2.ipynb" , jupyterNotebookContent2 } ,
{ "pyNb.py" , "# Databricks notebook source\nprint('first upload'))" } ,
{ "rNb.r" , "# Databricks notebook source\nprint('first upload'))" } ,
{ "scalaNb.scala" , "// Databricks notebook source\n println(\"first upload\"))" } ,
{ "sqlNb.sql" , "-- Databricks notebook source\n SELECT \"first upload\"" } ,
}
2024-06-04 09:53:14 +00:00
// Assert that every file has a unique basename
basenames := map [ string ] struct { } { }
for _ , f := range files {
basename := path . Base ( f . name )
if _ , ok := basenames [ basename ] ; ok {
t . Fatalf ( "basename %s is not unique" , basename )
}
basenames [ basename ] = struct { } { }
}
2024-05-30 11:59:27 +00:00
ctx := context . Background ( )
wf , _ := setupWsfsExtensionsFiler ( t )
for _ , f := range files {
err := wf . Write ( ctx , f . name , strings . NewReader ( f . content ) , filer . CreateParentDirectories )
require . NoError ( t , err )
}
// Read entries
entries , err := wf . ReadDir ( ctx , "." )
require . NoError ( t , err )
names := [ ] string { }
for _ , e := range entries {
names = append ( names , e . Name ( ) )
}
assert . Equal ( t , [ ] string {
"dir1" ,
"foo.py" ,
"foo.r" ,
"foo.scala" ,
"foo.sql" ,
"jupyterNb.ipynb" ,
"jupyterNb2.ipynb" ,
"pyNb.py" ,
"rNb.r" ,
"scalaNb.scala" ,
"sqlNb.sql" ,
} , names )
2024-06-04 09:53:14 +00:00
// Read entries in subdirectory
entries , err = wf . ReadDir ( ctx , "dir1" )
require . NoError ( t , err )
names = [ ] string { }
for _ , e := range entries {
names = append ( names , e . Name ( ) )
}
assert . Equal ( t , [ ] string {
"dir2" ,
"notebook.py" ,
} , names )
2024-05-30 11:59:27 +00:00
}
func setupFilerWithExtensionsTest ( t * testing . T ) filer . Filer {
files := [ ] struct {
name string
content string
} {
{ "foo.py" , "# Databricks notebook source\nprint('first upload'))" } ,
{ "bar.py" , "print('foo')" } ,
{ "jupyter.ipynb" , jupyterNotebookContent1 } ,
{ "pretender" , "not a notebook" } ,
{ "dir/file.txt" , "file content" } ,
{ "scala-notebook.scala" , "// Databricks notebook source\nprintln('first upload')" } ,
}
ctx := context . Background ( )
wf , _ := setupWsfsExtensionsFiler ( t )
for _ , f := range files {
err := wf . Write ( ctx , f . name , strings . NewReader ( f . content ) , filer . CreateParentDirectories )
require . NoError ( t , err )
}
return wf
}
func TestAccFilerWorkspaceFilesExtensionsRead ( t * testing . T ) {
t . Parallel ( )
ctx := context . Background ( )
wf := setupFilerWithExtensionsTest ( t )
// Read contents of test fixtures as a sanity check.
filerTest { t , wf } . assertContents ( ctx , "foo.py" , "# Databricks notebook source\nprint('first upload'))" )
filerTest { t , wf } . assertContents ( ctx , "bar.py" , "print('foo')" )
filerTest { t , wf } . assertContentsJupyter ( ctx , "jupyter.ipynb" )
filerTest { t , wf } . assertContents ( ctx , "dir/file.txt" , "file content" )
filerTest { t , wf } . assertContents ( ctx , "scala-notebook.scala" , "// Databricks notebook source\nprintln('first upload')" )
filerTest { t , wf } . assertContents ( ctx , "pretender" , "not a notebook" )
// Read non-existent file
_ , err := wf . Read ( ctx , "non-existent.py" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
// Ensure we do not read a regular file as a notebook
_ , err = wf . Read ( ctx , "pretender.py" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
_ , err = wf . Read ( ctx , "pretender.ipynb" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
// Read directory
_ , err = wf . Read ( ctx , "dir" )
assert . ErrorIs ( t , err , fs . ErrInvalid )
// Ensure we do not read a Scala notebook as a Python notebook
_ , err = wf . Read ( ctx , "scala-notebook.py" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
}
func TestAccFilerWorkspaceFilesExtensionsDelete ( t * testing . T ) {
t . Parallel ( )
ctx := context . Background ( )
wf := setupFilerWithExtensionsTest ( t )
// Delete notebook
err := wf . Delete ( ctx , "foo.py" )
require . NoError ( t , err )
filerTest { t , wf } . assertNotExists ( ctx , "foo.py" )
// Delete file
err = wf . Delete ( ctx , "bar.py" )
require . NoError ( t , err )
filerTest { t , wf } . assertNotExists ( ctx , "bar.py" )
// Delete jupyter notebook
err = wf . Delete ( ctx , "jupyter.ipynb" )
require . NoError ( t , err )
filerTest { t , wf } . assertNotExists ( ctx , "jupyter.ipynb" )
// Delete non-existent file
err = wf . Delete ( ctx , "non-existent.py" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
// Ensure we do not delete a file as a notebook
err = wf . Delete ( ctx , "pretender.py" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
// Ensure we do not delete a Scala notebook as a Python notebook
_ , err = wf . Read ( ctx , "scala-notebook.py" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
// Delete directory
err = wf . Delete ( ctx , "dir" )
assert . ErrorIs ( t , err , fs . ErrInvalid )
// Delete directory recursively
err = wf . Delete ( ctx , "dir" , filer . DeleteRecursively )
require . NoError ( t , err )
filerTest { t , wf } . assertNotExists ( ctx , "dir" )
}
func TestAccFilerWorkspaceFilesExtensionsStat ( t * testing . T ) {
t . Parallel ( )
ctx := context . Background ( )
wf := setupFilerWithExtensionsTest ( t )
// Stat on a notebook
info , err := wf . Stat ( ctx , "foo.py" )
require . NoError ( t , err )
assert . Equal ( t , "foo.py" , info . Name ( ) )
assert . False ( t , info . IsDir ( ) )
// Stat on a file
info , err = wf . Stat ( ctx , "bar.py" )
require . NoError ( t , err )
assert . Equal ( t , "bar.py" , info . Name ( ) )
assert . False ( t , info . IsDir ( ) )
// Stat on a Jupyter notebook
info , err = wf . Stat ( ctx , "jupyter.ipynb" )
require . NoError ( t , err )
assert . Equal ( t , "jupyter.ipynb" , info . Name ( ) )
assert . False ( t , info . IsDir ( ) )
// Stat on a directory
info , err = wf . Stat ( ctx , "dir" )
require . NoError ( t , err )
assert . Equal ( t , "dir" , info . Name ( ) )
assert . True ( t , info . IsDir ( ) )
// Stat on a non-existent file
_ , err = wf . Stat ( ctx , "non-existent.py" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
// Ensure we do not stat a file as a notebook
_ , err = wf . Stat ( ctx , "pretender.py" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
// Ensure we do not stat a Scala notebook as a Python notebook
_ , err = wf . Stat ( ctx , "scala-notebook.py" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
_ , err = wf . Stat ( ctx , "pretender.ipynb" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
}
func TestAccFilerWorkspaceFilesExtensionsErrorsOnDupName ( t * testing . T ) {
t . Parallel ( )
tcases := [ ] struct {
files [ ] struct { name , content string }
name string
} {
{
name : "python" ,
files : [ ] struct { name , content string } {
{ "foo.py" , "print('foo')" } ,
{ "foo.py" , "# Databricks notebook source\nprint('foo')" } ,
} ,
} ,
{
name : "r" ,
files : [ ] struct { name , content string } {
{ "foo.r" , "print('foo')" } ,
{ "foo.r" , "# Databricks notebook source\nprint('foo')" } ,
} ,
} ,
{
name : "sql" ,
files : [ ] struct { name , content string } {
{ "foo.sql" , "SELECT 'foo'" } ,
{ "foo.sql" , "-- Databricks notebook source\nSELECT 'foo'" } ,
} ,
} ,
{
name : "scala" ,
files : [ ] struct { name , content string } {
{ "foo.scala" , "println('foo')" } ,
{ "foo.scala" , "// Databricks notebook source\nprintln('foo')" } ,
} ,
} ,
// We don't need to test this for ipynb notebooks. The import API
// fails when the file extension is .ipynb but the content is not a
// valid juptyer notebook.
}
for i := range tcases {
tc := tcases [ i ]
t . Run ( tc . name , func ( t * testing . T ) {
t . Parallel ( )
ctx := context . Background ( )
wf , tmpDir := setupWsfsExtensionsFiler ( t )
for _ , f := range tc . files {
err := wf . Write ( ctx , f . name , strings . NewReader ( f . content ) , filer . CreateParentDirectories )
require . NoError ( t , err )
}
_ , err := wf . ReadDir ( ctx , "." )
assert . ErrorAs ( t , err , & filer . DuplicatePathError { } )
assert . ErrorContains ( t , err , fmt . Sprintf ( "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at %s and FILE at %s resolve to the same name %s. Changing the name of one of these objects will resolve this issue" , path . Join ( tmpDir , "foo" ) , path . Join ( tmpDir , tc . files [ 0 ] . name ) , tc . files [ 0 ] . name ) )
} )
}
}
func TestAccWorkspaceFilesExtensionsDirectoriesAreNotNotebooks ( t * testing . T ) {
t . Parallel ( )
ctx := context . Background ( )
wf , _ := setupWsfsExtensionsFiler ( t )
// Create a directory with an extension
err := wf . Mkdir ( ctx , "foo" )
require . NoError ( t , err )
// Reading foo.py should fail. foo is a directory, not a notebook.
_ , err = wf . Read ( ctx , "foo.py" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
}
func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved ( t * testing . T ) {
t . Parallel ( )
ctx := context . Background ( )
wf , _ := setupWsfsExtensionsFiler ( t )
// Case 1: Source Notebook
err := wf . Write ( ctx , "foo.py" , strings . NewReader ( "# Databricks notebook source\nprint('foo')" ) )
require . NoError ( t , err )
// The source notebook should exist but not the Jupyter notebook
filerTest { t , wf } . assertContents ( ctx , "foo.py" , "# Databricks notebook source\nprint('foo')" )
_ , err = wf . Stat ( ctx , "foo.ipynb" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
_ , err = wf . Read ( ctx , "foo.ipynb" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
err = wf . Delete ( ctx , "foo.ipynb" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
// Case 2: Jupyter Notebook
err = wf . Write ( ctx , "bar.ipynb" , strings . NewReader ( jupyterNotebookContent1 ) )
require . NoError ( t , err )
// The Jupyter notebook should exist but not the source notebook
filerTest { t , wf } . assertContentsJupyter ( ctx , "bar.ipynb" )
_ , err = wf . Stat ( ctx , "bar.py" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
_ , err = wf . Read ( ctx , "bar.py" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
err = wf . Delete ( ctx , "bar.py" )
assert . ErrorIs ( t , err , fs . ErrNotExist )
}