acc: Avoid reading and applying replacements on large files; validate utf8 (#2244)

## Changes
- Do not start replacement / comparison if file is too large or not
valid utf-8.
- This helps to prevent replacements if there is accidentally a large
binary (e.g. terraform).

## Tests
Found this problem when working on
https://github.com/databricks/cli/pull/2242 -- the tests tried to
applied replacements on terraform binary and crashed. With this change,
an error is reported instead.
This commit is contained in:
Denis Bilenko 2025-01-28 11:22:29 +01:00 committed by GitHub
parent 60709e3d48
commit 11436faafe
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 35 additions and 16 deletions

View File

@ -15,6 +15,7 @@ import (
"strings" "strings"
"testing" "testing"
"time" "time"
"unicode/utf8"
"github.com/databricks/cli/internal/testutil" "github.com/databricks/cli/internal/testutil"
"github.com/databricks/cli/libs/env" "github.com/databricks/cli/libs/env"
@ -44,6 +45,7 @@ const (
EntryPointScript = "script" EntryPointScript = "script"
CleanupScript = "script.cleanup" CleanupScript = "script.cleanup"
PrepareScript = "script.prepare" PrepareScript = "script.prepare"
MaxFileSize = 100_000
) )
var Scripts = map[string]bool{ var Scripts = map[string]bool{
@ -257,15 +259,15 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont
func doComparison(t *testing.T, repls testdiff.ReplacementsContext, dirRef, dirNew, relPath string, printedRepls *bool) { func doComparison(t *testing.T, repls testdiff.ReplacementsContext, dirRef, dirNew, relPath string, printedRepls *bool) {
pathRef := filepath.Join(dirRef, relPath) pathRef := filepath.Join(dirRef, relPath)
pathNew := filepath.Join(dirNew, relPath) pathNew := filepath.Join(dirNew, relPath)
bufRef, okRef := readIfExists(t, pathRef) bufRef, okRef := tryReading(t, pathRef)
bufNew, okNew := readIfExists(t, pathNew) bufNew, okNew := tryReading(t, pathNew)
if !okRef && !okNew { if !okRef && !okNew {
t.Errorf("Both files are missing: %s, %s", pathRef, pathNew) t.Errorf("Both files are missing or have errors: %s, %s", pathRef, pathNew)
return return
} }
valueRef := testdiff.NormalizeNewlines(string(bufRef)) valueRef := testdiff.NormalizeNewlines(bufRef)
valueNew := testdiff.NormalizeNewlines(string(bufNew)) valueNew := testdiff.NormalizeNewlines(bufNew)
// Apply replacements to the new value only. // Apply replacements to the new value only.
// The reference value is stored after applying replacements. // The reference value is stored after applying replacements.
@ -323,14 +325,14 @@ func readMergedScriptContents(t *testing.T, dir string) string {
cleanups := []string{} cleanups := []string{}
for { for {
x, ok := readIfExists(t, filepath.Join(dir, CleanupScript)) x, ok := tryReading(t, filepath.Join(dir, CleanupScript))
if ok { if ok {
cleanups = append(cleanups, string(x)) cleanups = append(cleanups, x)
} }
x, ok = readIfExists(t, filepath.Join(dir, PrepareScript)) x, ok = tryReading(t, filepath.Join(dir, PrepareScript))
if ok { if ok {
prepares = append(prepares, string(x)) prepares = append(prepares, x)
} }
if dir == "" || dir == "." { if dir == "" || dir == "." {
@ -417,16 +419,33 @@ func formatOutput(w io.Writer, err error) {
} }
} }
func readIfExists(t *testing.T, path string) ([]byte, bool) { func tryReading(t *testing.T, path string) (string, bool) {
data, err := os.ReadFile(path) info, err := os.Stat(path)
if err == nil { if err != nil {
return data, true if !errors.Is(err, os.ErrNotExist) {
t.Errorf("%s: %s", path, err)
}
return "", false
} }
if !errors.Is(err, os.ErrNotExist) { if info.Size() > MaxFileSize {
t.Fatalf("%s: %s", path, err) t.Errorf("%s: ignoring, too large: %d", path, info.Size())
return "", false
} }
return []byte{}, false
data, err := os.ReadFile(path)
if err != nil {
// already checked ErrNotExist above
t.Errorf("%s: %s", path, err)
return "", false
}
if !utf8.Valid(data) {
t.Errorf("%s: not valid utf-8", path)
return "", false
}
return string(data), true
} }
func CopyDir(src, dst string, inputs, outputs map[string]bool) error { func CopyDir(src, dst string, inputs, outputs map[string]bool) error {