[Fix] Do not buffer files in memory when downloading (#1599)

## Changes This PR fixes a performance bug that led downloaded files (e.g. with `databricks fs cp dbfs:/Volumes/.../somefile .`) to be buffered in memory before being written. Results from profiling the download of a ~100MB file: Before: ``` Type: alloc_space Showing nodes accounting for 374.02MB, 98.50% of 379.74MB total ``` After: ``` Type: alloc_space Showing nodes accounting for 3748.67kB, 100% of 3748.67kB total ``` Note that this fix is temporary. A longer term solution should be to use the API provided by the Go SDK rather than making an HTTP request directly from the CLI. fix #1575 ## Tests Verified that the CLI properly download the file when doing the profiling.
2024-07-17 09:14:02 +02:00 · 2024-07-17 09:14:02 +02:00 · 235973e7b1
parent 10fe02075f
commit 235973e7b1
1 changed files with 3 additions and 4 deletions
--- a/libs/filer/files_client.go
+++ b/libs/filer/files_client.go
@ -1,7 +1,6 @@
 package filer

 import (
-	"bytes"
 	"context"
 	"errors"
 	"fmt"
@ -179,12 +178,12 @@ func (w *FilesClient) Read(ctx context.Context, name string) (io.ReadCloser, err
 		return nil, err
 	}

-	var buf bytes.Buffer
-	err = w.apiClient.Do(ctx, http.MethodGet, urlPath, nil, nil, &buf)
+	var reader io.ReadCloser
+	err = w.apiClient.Do(ctx, http.MethodGet, urlPath, nil, nil, &reader)

 	// Return early on success.
 	if err == nil {
-		return io.NopCloser(&buf), nil
+		return reader, nil
 	}

 	// Special handling of this error only if it is an API error.