From 235973e7b19db5b1418ce24f9a280c769b991dd7 Mon Sep 17 00:00:00 2001 From: Renaud Hartert Date: Wed, 17 Jul 2024 09:14:02 +0200 Subject: [PATCH] [Fix] Do not buffer files in memory when downloading (#1599) ## Changes This PR fixes a performance bug that led downloaded files (e.g. with `databricks fs cp dbfs:/Volumes/.../somefile .`) to be buffered in memory before being written. Results from profiling the download of a ~100MB file: Before: ``` Type: alloc_space Showing nodes accounting for 374.02MB, 98.50% of 379.74MB total ``` After: ``` Type: alloc_space Showing nodes accounting for 3748.67kB, 100% of 3748.67kB total ``` Note that this fix is temporary. A longer term solution should be to use the API provided by the Go SDK rather than making an HTTP request directly from the CLI. fix #1575 ## Tests Verified that the CLI properly download the file when doing the profiling. --- libs/filer/files_client.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/libs/filer/files_client.go b/libs/filer/files_client.go index 9fc68bd56..7ea1d0f03 100644 --- a/libs/filer/files_client.go +++ b/libs/filer/files_client.go @@ -1,7 +1,6 @@ package filer import ( - "bytes" "context" "errors" "fmt" @@ -179,12 +178,12 @@ func (w *FilesClient) Read(ctx context.Context, name string) (io.ReadCloser, err return nil, err } - var buf bytes.Buffer - err = w.apiClient.Do(ctx, http.MethodGet, urlPath, nil, nil, &buf) + var reader io.ReadCloser + err = w.apiClient.Do(ctx, http.MethodGet, urlPath, nil, nil, &reader) // Return early on success. if err == nil { - return io.NopCloser(&buf), nil + return reader, nil } // Special handling of this error only if it is an API error.