2022-12-12 13:31:06 +00:00
|
|
|
package repofiles
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2023-02-20 15:00:20 +00:00
|
|
|
"errors"
|
2022-12-12 13:31:06 +00:00
|
|
|
"fmt"
|
2023-06-06 00:00:30 +00:00
|
|
|
"net/http"
|
|
|
|
"net/url"
|
2022-12-12 13:31:06 +00:00
|
|
|
"os"
|
|
|
|
"path"
|
|
|
|
"path/filepath"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"github.com/databricks/databricks-sdk-go"
|
|
|
|
"github.com/databricks/databricks-sdk-go/apierr"
|
2023-06-06 00:00:30 +00:00
|
|
|
"github.com/databricks/databricks-sdk-go/client"
|
2022-12-12 13:31:06 +00:00
|
|
|
"github.com/databricks/databricks-sdk-go/service/workspace"
|
|
|
|
)
|
|
|
|
|
2023-06-01 09:23:31 +00:00
|
|
|
type RepoFileOptions struct {
|
|
|
|
OverwriteIfExists bool
|
|
|
|
}
|
|
|
|
|
2022-12-12 13:31:06 +00:00
|
|
|
// RepoFiles wraps reading and writing into a remote repo with safeguards to prevent
|
2023-06-06 00:00:30 +00:00
|
|
|
// accidental deletion of repos and more robust methods to overwrite workspace files
|
2022-12-12 13:31:06 +00:00
|
|
|
type RepoFiles struct {
|
2023-06-01 09:23:31 +00:00
|
|
|
*RepoFileOptions
|
|
|
|
|
2022-12-12 13:31:06 +00:00
|
|
|
repoRoot string
|
|
|
|
localRoot string
|
|
|
|
workspaceClient *databricks.WorkspaceClient
|
|
|
|
}
|
|
|
|
|
2023-06-06 00:00:30 +00:00
|
|
|
func Create(repoRoot, localRoot string, workspaceClient *databricks.WorkspaceClient) *RepoFiles {
|
2022-12-12 13:31:06 +00:00
|
|
|
return &RepoFiles{
|
|
|
|
repoRoot: repoRoot,
|
|
|
|
localRoot: localRoot,
|
2023-06-06 00:00:30 +00:00
|
|
|
workspaceClient: workspaceClient,
|
|
|
|
}
|
2022-12-12 13:31:06 +00:00
|
|
|
}
|
|
|
|
|
2023-06-05 16:45:21 +00:00
|
|
|
func (r *RepoFiles) remotePath(relativePath string) (string, error) {
|
2022-12-12 13:31:06 +00:00
|
|
|
fullPath := path.Join(r.repoRoot, relativePath)
|
|
|
|
cleanFullPath := path.Clean(fullPath)
|
|
|
|
if !strings.HasPrefix(cleanFullPath, r.repoRoot) {
|
|
|
|
return "", fmt.Errorf("relative file path is not inside repo root: %s", relativePath)
|
|
|
|
}
|
|
|
|
// path.Clean will remove any trailing / so it's enough to check cleanFullPath == r.repoRoot
|
|
|
|
if cleanFullPath == r.repoRoot {
|
|
|
|
return "", fmt.Errorf("file path relative to repo root cannot be empty: %s", relativePath)
|
|
|
|
}
|
|
|
|
return cleanFullPath, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *RepoFiles) readLocal(relativePath string) ([]byte, error) {
|
|
|
|
localPath := filepath.Join(r.localRoot, relativePath)
|
|
|
|
return os.ReadFile(localPath)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *RepoFiles) writeRemote(ctx context.Context, relativePath string, content []byte) error {
|
2023-06-06 00:00:30 +00:00
|
|
|
apiClientConfig := r.workspaceClient.Config
|
|
|
|
apiClientConfig.HTTPTimeoutSeconds = 600
|
|
|
|
apiClient, err := client.New(apiClientConfig)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2022-12-12 13:31:06 +00:00
|
|
|
}
|
2023-06-06 00:00:30 +00:00
|
|
|
remotePath, err := r.remotePath(relativePath)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
escapedPath := url.PathEscape(strings.TrimLeft(remotePath, "/"))
|
|
|
|
apiPath := fmt.Sprintf("/api/2.0/workspace-files/import-file/%s?overwrite=%t", escapedPath, r.OverwriteIfExists)
|
|
|
|
|
|
|
|
err = apiClient.Do(ctx, http.MethodPost, apiPath, content, nil)
|
|
|
|
|
|
|
|
// Handling some edge cases when an upload might fail
|
|
|
|
//
|
|
|
|
// We cannot do more precise error scoping here because the API does not
|
|
|
|
// provide descriptive errors yet
|
|
|
|
//
|
|
|
|
// TODO: narrow down the error condition scope of this "if" block to only
|
|
|
|
// trigger for the specific edge cases instead of all errors once the API
|
|
|
|
// implements them
|
2022-12-12 13:31:06 +00:00
|
|
|
if err != nil {
|
|
|
|
// Delete any artifact files incase non overwriteable by the current file
|
|
|
|
// type and thus are failing the PUT request.
|
|
|
|
// files, folders and notebooks might not have been cleaned up and they
|
|
|
|
// can't overwrite each other. If a folder `foo` exists, then attempts to
|
|
|
|
// PUT a file `foo` will fail
|
2023-06-06 00:00:30 +00:00
|
|
|
err := r.workspaceClient.Workspace.Delete(ctx,
|
2022-12-12 13:31:06 +00:00
|
|
|
workspace.Delete{
|
|
|
|
Path: remotePath,
|
|
|
|
Recursive: true,
|
|
|
|
},
|
|
|
|
)
|
|
|
|
// ignore RESOURCE_DOES_NOT_EXIST here incase nothing existed at remotePath
|
2023-02-20 15:00:20 +00:00
|
|
|
var aerr *apierr.APIError
|
|
|
|
if errors.As(err, &aerr) && aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" {
|
2022-12-12 13:31:06 +00:00
|
|
|
err = nil
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-06-06 00:00:30 +00:00
|
|
|
// Mkdir parent dirs incase they are what's causing the PUT request to
|
|
|
|
// fail
|
|
|
|
err = r.workspaceClient.Workspace.MkdirsByPath(ctx, path.Dir(remotePath))
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("could not mkdir to put file: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Attempt to upload file again after cleanup/setup
|
|
|
|
err = apiClient.Do(ctx, http.MethodPost, apiPath, content, nil)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-12-12 13:31:06 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *RepoFiles) deleteRemote(ctx context.Context, relativePath string) error {
|
2023-06-06 00:00:30 +00:00
|
|
|
remotePath, err := r.remotePath(relativePath)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return r.workspaceClient.Workspace.Delete(ctx,
|
|
|
|
workspace.Delete{
|
|
|
|
Path: remotePath,
|
|
|
|
Recursive: false,
|
|
|
|
},
|
|
|
|
)
|
2022-12-12 13:31:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// The API calls for a python script foo.py would be
|
|
|
|
// `PUT foo.py`
|
|
|
|
// `DELETE foo.py`
|
|
|
|
//
|
|
|
|
// The API calls for a python notebook foo.py would be
|
|
|
|
// `PUT foo.py`
|
|
|
|
// `DELETE foo`
|
|
|
|
//
|
|
|
|
// The workspace file system backend strips .py from the file name if the python
|
|
|
|
// file is a notebook
|
|
|
|
func (r *RepoFiles) PutFile(ctx context.Context, relativePath string) error {
|
|
|
|
content, err := r.readLocal(relativePath)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return r.writeRemote(ctx, relativePath, content)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *RepoFiles) DeleteFile(ctx context.Context, relativePath string) error {
|
|
|
|
err := r.deleteRemote(ctx, relativePath)
|
|
|
|
|
|
|
|
// We explictly ignore RESOURCE_DOES_NOT_EXIST error to make delete idempotent
|
2023-02-20 15:00:20 +00:00
|
|
|
var aerr *apierr.APIError
|
|
|
|
if errors.As(err, &aerr) && aerr.ErrorCode == "RESOURCE_DOES_NOT_EXIST" {
|
|
|
|
err = nil
|
2022-12-12 13:31:06 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
2023-06-06 00:00:30 +00:00
|
|
|
|
|
|
|
// TODO: write integration tests for all non happy path cases that rely on
|
|
|
|
// specific behaviour of the workspace apis
|