acc: add a helper to diff with replacements (#2352)

## Changes

diff.py is like "diff -r -U2" but it applies replacements first to the
argument.

This allows comparing different output files and directories but ignore
differences that are going to be replaced by placeholders.

This is useful for tests that record large amount of files, specifically
"bundle init" with standard templates. In those tests, changing one
parameter results in a small diff so recording the full directory is not
helpful, because it's hard to see what changed there. I'm using it in
implementation of serverless mode for templates that need it: #2348 The
serverless templates are slightly different from classic, capturing the
diff helps to see exactly where.

Related small changes:
- Add [TESTROOT] replacement for absolute path to acceptance directory
in git repo.
- Add $TESTDIR env var for absolute path to a given test in git repo.

## Tests
- New test acceptance/selftest/diff to test the helper.
- Via #2348 which makes use of this feature.
This commit is contained in:
Denis Bilenko 2025-02-14 12:02:12 +01:00 committed by GitHub
parent 2d09636611
commit c0a56a93fb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 121 additions and 0 deletions

View File

@ -57,6 +57,8 @@ const (
CleanupScript = "script.cleanup"
PrepareScript = "script.prepare"
MaxFileSize = 100_000
// Filename to save replacements to (used by diff.py)
ReplsFile = "repls.json"
)
var Scripts = map[string]bool{
@ -65,6 +67,10 @@ var Scripts = map[string]bool{
PrepareScript: true,
}
var Ignored = map[string]bool{
ReplsFile: true,
}
func TestAccept(t *testing.T) {
testAccept(t, InprocessMode, SingleTest)
}
@ -152,6 +158,8 @@ func testAccept(t *testing.T, InprocessMode bool, singleTest string) int {
testdiff.PrepareReplacementSdkVersion(t, &repls)
testdiff.PrepareReplacementsGoVersion(t, &repls)
repls.SetPath(cwd, "[TESTROOT]")
repls.Repls = append(repls.Repls, testdiff.Replacement{Old: regexp.MustCompile("dbapi[0-9a-f]+"), New: "[DATABRICKS_TOKEN]"})
testDirs := getTests(t)
@ -310,6 +318,11 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont
// User replacements come last:
repls.Repls = append(repls.Repls, config.Repls...)
// Save replacements to temp test directory so that it can be read by diff.py
replsJson, err := json.MarshalIndent(repls.Repls, "", " ")
require.NoError(t, err)
testutil.WriteFile(t, filepath.Join(tmpDir, ReplsFile), string(replsJson))
if coverDir != "" {
// Creating individual coverage directory for each test, because writing to the same one
// results in sporadic failures like this one (only if tests are running in parallel):
@ -320,6 +333,10 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont
cmd.Env = append(cmd.Env, "GOCOVERDIR="+coverDir)
}
absDir, err := filepath.Abs(dir)
require.NoError(t, err)
cmd.Env = append(cmd.Env, "TESTDIR="+absDir)
// Write combined output to a file
out, err := os.Create(filepath.Join(tmpDir, "output.txt"))
require.NoError(t, err)
@ -368,6 +385,9 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont
if _, ok := outputs[relPath]; ok {
continue
}
if _, ok := Ignored[relPath]; ok {
continue
}
unexpected = append(unexpected, relPath)
if strings.HasPrefix(relPath, "out") {
// We have a new file starting with "out"

56
acceptance/bin/diff.py Executable file
View File

@ -0,0 +1,56 @@
#!/usr/bin/env python3
"""This script implements "diff -r -U2 dir1 dir2" but applies replacements first"""
import sys
import difflib
import json
import re
from pathlib import Path
def replaceAll(patterns, s):
for comp, new in patterns:
s = comp.sub(new, s)
return s
def main():
d1, d2 = sys.argv[1:]
d1, d2 = Path(d1), Path(d2)
with open("repls.json") as f:
repls = json.load(f)
patterns = []
for r in repls:
try:
c = re.compile(r["Old"])
patterns.append((c, r["New"]))
except re.error as e:
print(f"Regex error for pattern {r}: {e}", file=sys.stderr)
files1 = [str(p.relative_to(d1)) for p in d1.rglob("*") if p.is_file()]
files2 = [str(p.relative_to(d2)) for p in d2.rglob("*") if p.is_file()]
set1 = set(files1)
set2 = set(files2)
for f in sorted(set1 | set2):
p1 = d1 / f
p2 = d2 / f
if f not in set2:
print(f"Only in {d1}: {f}")
elif f not in set1:
print(f"Only in {d2}: {f}")
else:
a = [replaceAll(patterns, x) for x in p1.read_text().splitlines(True)]
b = [replaceAll(patterns, x) for x in p2.read_text().splitlines(True)]
if a != b:
p1_str = p1.as_posix()
p2_str = p2.as_posix()
for line in difflib.unified_diff(a, b, p1_str, p2_str, "", "", 2):
print(line, end="")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,7 @@
Hello!
{
"id": "[USERID]",
"userName": "[USERNAME]"
}
Footer

View File

@ -0,0 +1,7 @@
Hello!
{
"id": "[UUID]",
"userName": "[USERNAME]"
}
Footer

View File

@ -0,0 +1,13 @@
>>> diff.py out_dir_a out_dir_b
Only in out_dir_a: only_in_a
Only in out_dir_b: only_in_b
--- out_dir_a/output.txt
+++ out_dir_b/output.txt
@@ -1,5 +1,5 @@
Hello!
{
- "id": "[USERID]",
+ "id": "[UUID]",
"userName": "[USERNAME]"
}

View File

@ -0,0 +1,17 @@
mkdir out_dir_a
mkdir out_dir_b
touch out_dir_a/only_in_a
touch out_dir_b/only_in_b
echo Hello! >> out_dir_a/output.txt
echo Hello! >> out_dir_b/output.txt
curl -s $DATABRICKS_HOST/api/2.0/preview/scim/v2/Me >> out_dir_a/output.txt
printf "\n\nFooter" >> out_dir_a/output.txt
printf '{\n "id": "7d639bad-ac6d-4e6f-abd7-9522a86b0239",\n "userName": "[USERNAME]"\n}\n\nFooter' >> out_dir_b/output.txt
# Unlike regular diff, diff.py will apply replacements first before doing the comparison
errcode trace diff.py out_dir_a out_dir_b
rm out_dir_a/only_in_a out_dir_b/only_in_b

View File

@ -0,0 +1 @@
LocalOnly = true