mirror of https://github.com/databricks/cli.git
Let notebook detection code use underlying metadata if available (#1574)
## Changes If we're using a `vfs.Path` backed by a workspace filesystem filer, we have access to the `workspace.ObjectInfo` value for every file. By providing access to this value we can use it directly and avoid reading the first line of the underlying file. A follow-up change will implement the interface defined in this change for the workspace filesystem filer. ## Tests Unit tests.
This commit is contained in:
parent
5bc5c3c26a
commit
8f56ca39a2
|
@ -12,27 +12,69 @@ import (
|
||||||
"github.com/databricks/databricks-sdk-go/service/workspace"
|
"github.com/databricks/databricks-sdk-go/service/workspace"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// FileInfoWithWorkspaceObjectInfo is an interface implemented by [fs.FileInfo] values that
|
||||||
|
// contain a file's underlying [workspace.ObjectInfo].
|
||||||
|
//
|
||||||
|
// This may be the case when working with a [filer.Filer] backed by the workspace API.
|
||||||
|
// For these files we do not need to read a file's header to know if it is a notebook;
|
||||||
|
// we can use the [workspace.ObjectInfo] value directly.
|
||||||
|
type FileInfoWithWorkspaceObjectInfo interface {
|
||||||
|
WorkspaceObjectInfo() workspace.ObjectInfo
|
||||||
|
}
|
||||||
|
|
||||||
// Maximum length in bytes of the notebook header.
|
// Maximum length in bytes of the notebook header.
|
||||||
const headerLength = 32
|
const headerLength = 32
|
||||||
|
|
||||||
// readHeader reads the first N bytes from a file.
|
// file wraps an fs.File and implements a few helper methods such that
|
||||||
func readHeader(fsys fs.FS, name string) ([]byte, error) {
|
// they don't need to be inlined in the [DetectWithFS] function below.
|
||||||
|
type file struct {
|
||||||
|
f fs.File
|
||||||
|
}
|
||||||
|
|
||||||
|
func openFile(fsys fs.FS, name string) (*file, error) {
|
||||||
f, err := fsys.Open(name)
|
f, err := fsys.Open(name)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
defer f.Close()
|
return &file{f: f}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f file) close() error {
|
||||||
|
return f.f.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f file) readHeader() (string, error) {
|
||||||
// Scan header line with some padding.
|
// Scan header line with some padding.
|
||||||
var buf = make([]byte, headerLength)
|
var buf = make([]byte, headerLength)
|
||||||
n, err := f.Read([]byte(buf))
|
n, err := f.f.Read([]byte(buf))
|
||||||
if err != nil && err != io.EOF {
|
if err != nil && err != io.EOF {
|
||||||
return nil, err
|
return "", err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Trim buffer to actual read bytes.
|
// Trim buffer to actual read bytes.
|
||||||
return buf[:n], nil
|
buf = buf[:n]
|
||||||
|
|
||||||
|
// Read the first line from the buffer.
|
||||||
|
scanner := bufio.NewScanner(bytes.NewReader(buf))
|
||||||
|
scanner.Scan()
|
||||||
|
return scanner.Text(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// getObjectInfo returns the [workspace.ObjectInfo] for the file if it is
|
||||||
|
// part of the [fs.FileInfo] value returned by the [fs.Stat] call.
|
||||||
|
func (f file) getObjectInfo() (oi workspace.ObjectInfo, ok bool, err error) {
|
||||||
|
stat, err := f.f.Stat()
|
||||||
|
if err != nil {
|
||||||
|
return workspace.ObjectInfo{}, false, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use object info if available.
|
||||||
|
if i, ok := stat.(FileInfoWithWorkspaceObjectInfo); ok {
|
||||||
|
return i.WorkspaceObjectInfo(), true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return workspace.ObjectInfo{}, false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Detect returns whether the file at path is a Databricks notebook.
|
// Detect returns whether the file at path is a Databricks notebook.
|
||||||
|
@ -40,13 +82,27 @@ func readHeader(fsys fs.FS, name string) ([]byte, error) {
|
||||||
func DetectWithFS(fsys fs.FS, name string) (notebook bool, language workspace.Language, err error) {
|
func DetectWithFS(fsys fs.FS, name string) (notebook bool, language workspace.Language, err error) {
|
||||||
header := ""
|
header := ""
|
||||||
|
|
||||||
buf, err := readHeader(fsys, name)
|
f, err := openFile(fsys, name)
|
||||||
|
if err != nil {
|
||||||
|
return false, "", err
|
||||||
|
}
|
||||||
|
|
||||||
|
defer f.close()
|
||||||
|
|
||||||
|
// Use object info if available.
|
||||||
|
oi, ok, err := f.getObjectInfo()
|
||||||
|
if err != nil {
|
||||||
|
return false, "", err
|
||||||
|
}
|
||||||
|
if ok {
|
||||||
|
return oi.ObjectType == workspace.ObjectTypeNotebook, oi.Language, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the first line of the file.
|
||||||
|
fileHeader, err := f.readHeader()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, "", err
|
return false, "", err
|
||||||
}
|
}
|
||||||
scanner := bufio.NewScanner(bytes.NewReader(buf))
|
|
||||||
scanner.Scan()
|
|
||||||
fileHeader := scanner.Text()
|
|
||||||
|
|
||||||
// Determine which header to expect based on filename extension.
|
// Determine which header to expect based on filename extension.
|
||||||
ext := strings.ToLower(filepath.Ext(name))
|
ext := strings.ToLower(filepath.Ext(name))
|
||||||
|
|
|
@ -99,3 +99,21 @@ func TestDetectFileWithLongHeader(t *testing.T) {
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
assert.False(t, nb)
|
assert.False(t, nb)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDetectWithObjectInfo(t *testing.T) {
|
||||||
|
fakeFS := &fakeFS{
|
||||||
|
fakeFile{
|
||||||
|
fakeFileInfo{
|
||||||
|
workspace.ObjectInfo{
|
||||||
|
ObjectType: workspace.ObjectTypeNotebook,
|
||||||
|
Language: workspace.LanguagePython,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
nb, lang, err := DetectWithFS(fakeFS, "doesntmatter")
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.True(t, nb)
|
||||||
|
assert.Equal(t, workspace.LanguagePython, lang)
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,77 @@
|
||||||
|
package notebook
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io/fs"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/databricks/databricks-sdk-go/service/workspace"
|
||||||
|
)
|
||||||
|
|
||||||
|
type fakeFS struct {
|
||||||
|
fakeFile
|
||||||
|
}
|
||||||
|
|
||||||
|
type fakeFile struct {
|
||||||
|
fakeFileInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeFile) Close() error {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeFile) Read(p []byte) (n int, err error) {
|
||||||
|
return 0, fmt.Errorf("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeFile) Stat() (fs.FileInfo, error) {
|
||||||
|
return f.fakeFileInfo, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type fakeFileInfo struct {
|
||||||
|
oi workspace.ObjectInfo
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeFileInfo) WorkspaceObjectInfo() workspace.ObjectInfo {
|
||||||
|
return f.oi
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeFileInfo) Name() string {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeFileInfo) Size() int64 {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeFileInfo) Mode() fs.FileMode {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeFileInfo) ModTime() time.Time {
|
||||||
|
return time.Time{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeFileInfo) IsDir() bool {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeFileInfo) Sys() any {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeFS) Open(name string) (fs.File, error) {
|
||||||
|
return f.fakeFile, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeFS) Stat(name string) (fs.FileInfo, error) {
|
||||||
|
panic("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeFS) ReadDir(name string) ([]fs.DirEntry, error) {
|
||||||
|
panic("not implemented")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f fakeFS) ReadFile(name string) ([]byte, error) {
|
||||||
|
panic("not implemented")
|
||||||
|
}
|
Loading…
Reference in New Issue