From c0a56a93fb5ad472d9ba88c6e4fd1cf14973ec70 Mon Sep 17 00:00:00 2001 From: Denis Bilenko Date: Fri, 14 Feb 2025 12:02:12 +0100 Subject: [PATCH] acc: add a helper to diff with replacements (#2352) ## Changes diff.py is like "diff -r -U2" but it applies replacements first to the argument. This allows comparing different output files and directories but ignore differences that are going to be replaced by placeholders. This is useful for tests that record large amount of files, specifically "bundle init" with standard templates. In those tests, changing one parameter results in a small diff so recording the full directory is not helpful, because it's hard to see what changed there. I'm using it in implementation of serverless mode for templates that need it: #2348 The serverless templates are slightly different from classic, capturing the diff helps to see exactly where. Related small changes: - Add [TESTROOT] replacement for absolute path to acceptance directory in git repo. - Add $TESTDIR env var for absolute path to a given test in git repo. ## Tests - New test acceptance/selftest/diff to test the helper. - Via #2348 which makes use of this feature. --- acceptance/acceptance_test.go | 20 +++++++ acceptance/bin/diff.py | 56 +++++++++++++++++++ acceptance/selftest/diff/out_dir_a/output.txt | 7 +++ acceptance/selftest/diff/out_dir_b/output.txt | 7 +++ acceptance/selftest/diff/output.txt | 13 +++++ acceptance/selftest/diff/script | 17 ++++++ acceptance/selftest/test.toml | 1 + 7 files changed, 121 insertions(+) create mode 100755 acceptance/bin/diff.py create mode 100644 acceptance/selftest/diff/out_dir_a/output.txt create mode 100644 acceptance/selftest/diff/out_dir_b/output.txt create mode 100644 acceptance/selftest/diff/output.txt create mode 100644 acceptance/selftest/diff/script create mode 100644 acceptance/selftest/test.toml diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go index d99ad2991..c7b1151ab 100644 --- a/acceptance/acceptance_test.go +++ b/acceptance/acceptance_test.go @@ -57,6 +57,8 @@ const ( CleanupScript = "script.cleanup" PrepareScript = "script.prepare" MaxFileSize = 100_000 + // Filename to save replacements to (used by diff.py) + ReplsFile = "repls.json" ) var Scripts = map[string]bool{ @@ -65,6 +67,10 @@ var Scripts = map[string]bool{ PrepareScript: true, } +var Ignored = map[string]bool{ + ReplsFile: true, +} + func TestAccept(t *testing.T) { testAccept(t, InprocessMode, SingleTest) } @@ -152,6 +158,8 @@ func testAccept(t *testing.T, InprocessMode bool, singleTest string) int { testdiff.PrepareReplacementSdkVersion(t, &repls) testdiff.PrepareReplacementsGoVersion(t, &repls) + repls.SetPath(cwd, "[TESTROOT]") + repls.Repls = append(repls.Repls, testdiff.Replacement{Old: regexp.MustCompile("dbapi[0-9a-f]+"), New: "[DATABRICKS_TOKEN]"}) testDirs := getTests(t) @@ -310,6 +318,11 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont // User replacements come last: repls.Repls = append(repls.Repls, config.Repls...) + // Save replacements to temp test directory so that it can be read by diff.py + replsJson, err := json.MarshalIndent(repls.Repls, "", " ") + require.NoError(t, err) + testutil.WriteFile(t, filepath.Join(tmpDir, ReplsFile), string(replsJson)) + if coverDir != "" { // Creating individual coverage directory for each test, because writing to the same one // results in sporadic failures like this one (only if tests are running in parallel): @@ -320,6 +333,10 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont cmd.Env = append(cmd.Env, "GOCOVERDIR="+coverDir) } + absDir, err := filepath.Abs(dir) + require.NoError(t, err) + cmd.Env = append(cmd.Env, "TESTDIR="+absDir) + // Write combined output to a file out, err := os.Create(filepath.Join(tmpDir, "output.txt")) require.NoError(t, err) @@ -368,6 +385,9 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont if _, ok := outputs[relPath]; ok { continue } + if _, ok := Ignored[relPath]; ok { + continue + } unexpected = append(unexpected, relPath) if strings.HasPrefix(relPath, "out") { // We have a new file starting with "out" diff --git a/acceptance/bin/diff.py b/acceptance/bin/diff.py new file mode 100755 index 000000000..0a91d57ce --- /dev/null +++ b/acceptance/bin/diff.py @@ -0,0 +1,56 @@ +#!/usr/bin/env python3 +"""This script implements "diff -r -U2 dir1 dir2" but applies replacements first""" + +import sys +import difflib +import json +import re +from pathlib import Path + + +def replaceAll(patterns, s): + for comp, new in patterns: + s = comp.sub(new, s) + return s + + +def main(): + d1, d2 = sys.argv[1:] + d1, d2 = Path(d1), Path(d2) + + with open("repls.json") as f: + repls = json.load(f) + + patterns = [] + for r in repls: + try: + c = re.compile(r["Old"]) + patterns.append((c, r["New"])) + except re.error as e: + print(f"Regex error for pattern {r}: {e}", file=sys.stderr) + + files1 = [str(p.relative_to(d1)) for p in d1.rglob("*") if p.is_file()] + files2 = [str(p.relative_to(d2)) for p in d2.rglob("*") if p.is_file()] + + set1 = set(files1) + set2 = set(files2) + + for f in sorted(set1 | set2): + p1 = d1 / f + p2 = d2 / f + if f not in set2: + print(f"Only in {d1}: {f}") + elif f not in set1: + print(f"Only in {d2}: {f}") + else: + a = [replaceAll(patterns, x) for x in p1.read_text().splitlines(True)] + b = [replaceAll(patterns, x) for x in p2.read_text().splitlines(True)] + if a != b: + p1_str = p1.as_posix() + p2_str = p2.as_posix() + for line in difflib.unified_diff(a, b, p1_str, p2_str, "", "", 2): + print(line, end="") + + +if __name__ == "__main__": + main() diff --git a/acceptance/selftest/diff/out_dir_a/output.txt b/acceptance/selftest/diff/out_dir_a/output.txt new file mode 100644 index 000000000..303c1867b --- /dev/null +++ b/acceptance/selftest/diff/out_dir_a/output.txt @@ -0,0 +1,7 @@ +Hello! +{ + "id": "[USERID]", + "userName": "[USERNAME]" +} + +Footer \ No newline at end of file diff --git a/acceptance/selftest/diff/out_dir_b/output.txt b/acceptance/selftest/diff/out_dir_b/output.txt new file mode 100644 index 000000000..f4f01af13 --- /dev/null +++ b/acceptance/selftest/diff/out_dir_b/output.txt @@ -0,0 +1,7 @@ +Hello! +{ + "id": "[UUID]", + "userName": "[USERNAME]" +} + +Footer \ No newline at end of file diff --git a/acceptance/selftest/diff/output.txt b/acceptance/selftest/diff/output.txt new file mode 100644 index 000000000..aef99f1e3 --- /dev/null +++ b/acceptance/selftest/diff/output.txt @@ -0,0 +1,13 @@ + +>>> diff.py out_dir_a out_dir_b +Only in out_dir_a: only_in_a +Only in out_dir_b: only_in_b +--- out_dir_a/output.txt ++++ out_dir_b/output.txt +@@ -1,5 +1,5 @@ + Hello! + { +- "id": "[USERID]", ++ "id": "[UUID]", + "userName": "[USERNAME]" + } diff --git a/acceptance/selftest/diff/script b/acceptance/selftest/diff/script new file mode 100644 index 000000000..a7b8706e6 --- /dev/null +++ b/acceptance/selftest/diff/script @@ -0,0 +1,17 @@ +mkdir out_dir_a +mkdir out_dir_b + +touch out_dir_a/only_in_a +touch out_dir_b/only_in_b + +echo Hello! >> out_dir_a/output.txt +echo Hello! >> out_dir_b/output.txt + +curl -s $DATABRICKS_HOST/api/2.0/preview/scim/v2/Me >> out_dir_a/output.txt +printf "\n\nFooter" >> out_dir_a/output.txt +printf '{\n "id": "7d639bad-ac6d-4e6f-abd7-9522a86b0239",\n "userName": "[USERNAME]"\n}\n\nFooter' >> out_dir_b/output.txt + +# Unlike regular diff, diff.py will apply replacements first before doing the comparison +errcode trace diff.py out_dir_a out_dir_b + +rm out_dir_a/only_in_a out_dir_b/only_in_b diff --git a/acceptance/selftest/test.toml b/acceptance/selftest/test.toml new file mode 100644 index 000000000..b76e712fb --- /dev/null +++ b/acceptance/selftest/test.toml @@ -0,0 +1 @@ +LocalOnly = true