From 2addd0c506f61b8c1eed98269931b4c9212d97b4 Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 6 Dec 2024 22:12:42 +0100 Subject: [PATCH] Extend catalog/schema preset support --- bundle/config/mutator/apply_presets.go | 113 ++++++++++++++---- bundle/config/mutator/apply_presets_test.go | 50 ++++++++ .../databricks_template_schema.json | 6 +- .../scratch/exploration.ipynb.tmpl | 78 ++++++++++-- .../src/{{.project_name}}/main.py.tmpl | 4 +- 5 files changed, 213 insertions(+), 38 deletions(-) diff --git a/bundle/config/mutator/apply_presets.go b/bundle/config/mutator/apply_presets.go index a68ebfc1c..39d979992 100644 --- a/bundle/config/mutator/apply_presets.go +++ b/bundle/config/mutator/apply_presets.go @@ -3,7 +3,9 @@ package mutator import ( "context" "fmt" + "os" "path" + "regexp" "slices" "sort" "strings" @@ -14,6 +16,7 @@ import ( "github.com/databricks/cli/libs/dbr" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/dyn" + "github.com/databricks/cli/libs/log" "github.com/databricks/cli/libs/textutil" "github.com/databricks/databricks-sdk-go/service/catalog" "github.com/databricks/databricks-sdk-go/service/jobs" @@ -100,7 +103,7 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos } diags = diags.Extend(addCatalogSchemaParameters(b, key, j, t)) - diags = diags.Extend(validateCatalogSchemaUsage(b, key, j)) + diags = diags.Extend(recommendCatalogSchemaUsage(b, ctx, key, j)) } } @@ -333,31 +336,6 @@ func validateCatalogAndSchema(b *bundle.Bundle) diag.Diagnostics { } return nil } -func validateCatalogSchemaUsage(b *bundle.Bundle, key string, job *resources.Job) diag.Diagnostics { - for _, t := range job.Tasks { - if t.NotebookTask != nil { - // TODO: proper validation that warns - return diag.Diagnostics{{ - Summary: fmt.Sprintf("job %s must read catalog and schema from parameters for notebook %s", - key, t.NotebookTask.NotebookPath), - Severity: diag.Recommendation, - Locations: []dyn.Location{b.Config.GetLocation("resources.jobs." + key)}, - }} - } - } - - // TODO: validate that any notebooks actually read the catalog/schema arguments - // if ... { - // return diag.Diagnostics{{ - // Summary: fmt.Sprintf("job %s must pass catalog and schema presets as parameters as follows:\n"+ - // " ...", key), - // Severity: diag.Error, - // Locations: []dyn.Location{b.Config.GetLocation("resources.jobs." + key)}, - // }} - // } - - return nil -} // toTagArray converts a map of tags to an array of tags. // We sort tags so ensure stable ordering. @@ -443,3 +421,86 @@ func addCatalogSchemaParameters(b *bundle.Bundle, key string, job *resources.Job return diags } + +func recommendCatalogSchemaUsage(b *bundle.Bundle, ctx context.Context, key string, job *resources.Job) diag.Diagnostics { + var diags diag.Diagnostics + for _, t := range job.Tasks { + var relPath string + var expected string + var fix string + if t.NotebookTask != nil { + relPath = t.NotebookTask.NotebookPath + expected = `" dbutils.widgets.text(['"]schema|` + + `USE[^)]+schema` + fix = " dbutils.widgets.text('catalog')\n" + + " dbutils.widgets.text('schema')\n" + + " catalog = dbutils.widgets.get('catalog')\n" + + " schema = dbutils.widgets.get('schema')\n" + + " spark.sql(f'USE {catalog}.{schema}')\n" + } else if t.SparkPythonTask != nil { + relPath = t.SparkPythonTask.PythonFile + expected = `add_argument\(['"]--catalog'|` + + `USE[^)]+catalog` + fix = " def main():\n" + + " parser = argparse.ArgumentParser()\n" + + " parser.add_argument('--catalog', required=True)\n" + + " parser.add_argument('--schema', '-s', required=True)\n" + + " args, unknown = parser.parse_known_args()\n" + + " spark.sql(f\"USE {args.catalog}.{args.schema}\")\n" + } else if t.SqlTask != nil && t.SqlTask.File != nil { + relPath = t.SqlTask.File.Path + expected = `:schema|\{\{schema\}\}` + fix = " USE CATALOG {{catalog}};\n" + + " USE IDENTIFIER({schema});\n" + } else { + continue + } + + sourceDir, err := b.Config.GetLocation("resources.jobs." + key).Directory() + if err != nil { + continue + } + + localPath, _, err := GetLocalPath(ctx, b, sourceDir, relPath) + if err != nil { + // Any path errors are reported by another mutator + continue + } + if localPath == "" { + // If there is no local copy we don't want to download it and skip this check + continue + } + + log.Warnf(ctx, "LocalPath: %s, relPath: %s, sourceDir: %s", localPath, relPath, sourceDir) + + if !fileIncludesPattern(ctx, localPath, expected) { + diags = diags.Extend(diag.Diagnostics{{ + Summary: fmt.Sprintf("Use the 'catalog' and 'schema' parameters provided via 'presets.catalog' and 'presets.schema' using\n\n" + + fix), + Severity: diag.Recommendation, + Locations: []dyn.Location{{ + File: localPath, + Line: 1, + Column: 1, + }}, + }}) + } + } + + return diags +} + +func fileIncludesPattern(ctx context.Context, filePath string, expected string) bool { + content, err := os.ReadFile(filePath) + if err != nil { + log.Warnf(ctx, "failed to check file %s: %v", filePath, err) + return true + } + + matched, err := regexp.MatchString(expected, string(content)) + if err != nil { + log.Warnf(ctx, "failed to check pattern in %s: %v", filePath, err) + return true + } + return matched +} diff --git a/bundle/config/mutator/apply_presets_test.go b/bundle/config/mutator/apply_presets_test.go index 497ef051a..eb0eea5de 100644 --- a/bundle/config/mutator/apply_presets_test.go +++ b/bundle/config/mutator/apply_presets_test.go @@ -453,3 +453,53 @@ func TestApplyPresetsSourceLinkedDeployment(t *testing.T) { } } + +// func TestApplyPresetsRecommendsCatalogSchemaUsage(t *testing.T) { +// dir := t.TempDir() + +// ... + +// b := &bundle.Bundle{ +// Config: config.Root{ +// Resources: config.Resources{ +// Jobs: map[string]*resources.Job{ +// "job1": { +// JobSettings: &jobs.JobSettings{ +// Tasks: []jobs.Task{ +// { +// NotebookTask: &jobs.NotebookTask{ +// NotebookPath: notebookPath, +// }, +// }, +// { +// SparkPythonTask: &jobs.SparkPythonTask{ +// PythonFile: pythonPath, +// }, +// }, +// { +// NotebookTask: &jobs.NotebookTask{ +// NotebookPath: "/Workspace/absolute/path/notebook", +// }, +// }, +// }, +// }, +// }, +// }, +// }, +// }, +// } + +// ctx := context.Background() +// diags := bundle.Apply(ctx, b, ApplyPresets()) +// require.Len(t, diags, 2) + +// // Check notebook diagnostic +// assert.Equal(t, notebookPath, diags[0].Locations[0].File) +// assert.Equal(t, 1, diags[0].Locations[0].Line) +// assert.Equal(t, 1, diags[0].Locations[0].Column) + +// // Check Python script diagnostic +// assert.Equal(t, pythonPath, diags[1].Locations[0].File) +// assert.Equal(t, 1, diags[1].Locations[0].Line) +// assert.Equal(t, 1, diags[1].Locations[0].Column) +// } diff --git a/libs/template/templates/default-python/databricks_template_schema.json b/libs/template/templates/default-python/databricks_template_schema.json index 6f457a6ae..92e8316df 100644 --- a/libs/template/templates/default-python/databricks_template_schema.json +++ b/libs/template/templates/default-python/databricks_template_schema.json @@ -13,21 +13,21 @@ "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "\n\nInclude a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'", + "description": "\nWould you like to include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'?", "order": 2 }, "include_dlt": { "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "Include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'", + "description": "Would you like to include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'?", "order": 3 }, "include_python": { "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'", + "description": "Would you like to include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'?", "order": 4 }, "default_catalog": { diff --git a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl index 42164dff0..8c0298fec 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl @@ -27,15 +27,24 @@ }, "outputs": [], "source": [ - {{- if (eq .include_python "yes") }} + {{- if (eq .include_python "yes") }} "import sys\n", "sys.path.append('../src')\n", "from {{.project_name}} import main\n", "\n", - "main.get_taxis(spark).show(10)" - {{else}} - "spark.range(10)" - {{end -}} + "catalog = dbutils.widgets.get('catalog')\n", + "schema = dbutils.widgets.get('schema')\n", + "spark.sql(f'USE {catalog}.{schema}')\n", + "\n", + "spark.sql('SELECT * FROM example').show(10)" + {{- else}} + "# Load default catalog and schema as widget and set their values as the default catalog / schema\n", + "catalog = dbutils.widgets.get('catalog')\n", + "schema = dbutils.widgets.get('schema')\n", + "spark.sql(f'USE {catalog}.{schema}')\n", + "\n", + "spark.sql('SELECT * FROM example').show(10)" + {{- end}} ] } ], @@ -46,8 +55,63 @@ "notebookMetadata": { "pythonIndentUnit": 2 }, - "notebookName": "ipynb-notebook", - "widgets": {} + "notebookName": "exploration", + "widgets": { + "catalog": { + "currentValue": "{{.default_catalog}}", + "nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f1234", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "{{.default_catalog}}", + "label": null, + "name": "catalog", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "{{.default_catalog}}", + "label": null, + "name": "catalog", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } + }, +{{- $dev_schema := .shared_schema }} +{{- if (regexp "^yes").MatchString .personal_schemas}} + {{- $dev_schema = "{{short_name}}"}} +{{- end}} + "schema": { + "currentValue": "{{$dev_schema}}", + "nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f5678", + "typedWidgetInfo": { + "autoCreated": false, + "defaultValue": "{{$dev_schema}}", + "label": null, + "name": "schema", + "options": { + "widgetDisplayType": "Text", + "validationRegex": null + }, + "parameterDataType": "String" + }, + "widgetInfo": { + "widgetType": "text", + "defaultValue": "{{$dev_schema}}", + "label": null, + "name": "schema", + "options": { + "widgetType": "text", + "autoCreated": null, + "validationRegex": null + } + } }, "kernelspec": { "display_name": "Python 3", diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl index 45af6fa86..e79920a9e 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl @@ -27,8 +27,8 @@ def main(): # In the default template, these parameters are set # using the 'catalog' and 'schema' presets in databricks.yml. parser = argparse.ArgumentParser() - parser.add_argument('--catalog', '-c', required=True) - parser.add_argument('--schema', '-s', required=True) + parser.add_argument('--catalog', required=True) + parser.add_argument('--schema', required=True) args, unknown = parser.parse_known_args() spark = get_spark() spark.sql(f"USE {args.catalog}.{args.schema}")