Extend catalog/schema preset support

2024-12-06 22:12:42 +01:00 · 2024-12-06 22:12:42 +01:00 · 2addd0c506
parent 963f5e6636
commit 2addd0c506
5 changed files with 213 additions and 38 deletions
--- a/bundle/config/mutator/apply_presets.go
+++ b/bundle/config/mutator/apply_presets.go
@ -3,7 +3,9 @@ package mutator
 import (
 	"context"
 	"fmt"
 	"os"
 	"path"
 	"regexp"
 	"slices"
 	"sort"
 	"strings"
@ -14,6 +16,7 @@ import (
 	"github.com/databricks/cli/libs/dbr"
 	"github.com/databricks/cli/libs/diag"
 	"github.com/databricks/cli/libs/dyn"
 	"github.com/databricks/cli/libs/log"
 	"github.com/databricks/cli/libs/textutil"
 	"github.com/databricks/databricks-sdk-go/service/catalog"
 	"github.com/databricks/databricks-sdk-go/service/jobs"
@ -100,7 +103,7 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos
 			}
 			diags = diags.Extend(addCatalogSchemaParameters(b, key, j, t))
-			diags = diags.Extend(validateCatalogSchemaUsage(b, key, j))
+			diags = diags.Extend(recommendCatalogSchemaUsage(b, ctx, key, j))
 		}
 	}
@ -333,31 +336,6 @@ func validateCatalogAndSchema(b *bundle.Bundle) diag.Diagnostics {
 	}
 	return nil
 }
 func validateCatalogSchemaUsage(b *bundle.Bundle, key string, job *resources.Job) diag.Diagnostics {
 	for _, t := range job.Tasks {
 		if t.NotebookTask != nil {
 			// TODO: proper validation that warns
 			return diag.Diagnostics{{
 				Summary: fmt.Sprintf("job %s must read catalog and schema from parameters for notebook %s",
 					key, t.NotebookTask.NotebookPath),
 				Severity:  diag.Recommendation,
 				Locations: []dyn.Location{b.Config.GetLocation("resources.jobs." + key)},
 			}}
 		}
 	}
 	// TODO: validate that any notebooks actually read the catalog/schema arguments
 	// if ... {
 	// 	return diag.Diagnostics{{
 	// 		Summary: fmt.Sprintf("job %s must pass catalog and schema presets as parameters as follows:\n"+
 	// 			"  ...", key),
 	// 		Severity:  diag.Error,
 	// 		Locations: []dyn.Location{b.Config.GetLocation("resources.jobs." + key)},
 	// 	}}
 	// }
 	return nil
 }
 // toTagArray converts a map of tags to an array of tags.
 // We sort tags so ensure stable ordering.
@ -443,3 +421,86 @@ func addCatalogSchemaParameters(b *bundle.Bundle, key string, job *resources.Job
 	return diags
 }
 func recommendCatalogSchemaUsage(b *bundle.Bundle, ctx context.Context, key string, job *resources.Job) diag.Diagnostics {
 	var diags diag.Diagnostics
 	for _, t := range job.Tasks {
 		var relPath string
 		var expected string
 		var fix string
 		if t.NotebookTask != nil {
 			relPath = t.NotebookTask.NotebookPath
 			expected = `"  dbutils.widgets.text(['"]schema|` +
 				`USE[^)]+schema`
 			fix = "  dbutils.widgets.text('catalog')\n" +
 				"  dbutils.widgets.text('schema')\n" +
 				"  catalog = dbutils.widgets.get('catalog')\n" +
 				"  schema = dbutils.widgets.get('schema')\n" +
 				"  spark.sql(f'USE {catalog}.{schema}')\n"
 		} else if t.SparkPythonTask != nil {
 			relPath = t.SparkPythonTask.PythonFile
 			expected = `add_argument\(['"]--catalog'|` +
 				`USE[^)]+catalog`
 			fix = "  def main():\n" +
 				"    parser = argparse.ArgumentParser()\n" +
 				"    parser.add_argument('--catalog', required=True)\n" +
 				"    parser.add_argument('--schema', '-s', required=True)\n" +
 				"    args, unknown = parser.parse_known_args()\n" +
 				"    spark.sql(f\"USE {args.catalog}.{args.schema}\")\n"
 		} else if t.SqlTask != nil && t.SqlTask.File != nil {
 			relPath = t.SqlTask.File.Path
 			expected = `:schema|\{\{schema\}\}`
 			fix = "  USE CATALOG {{catalog}};\n" +
 				"  USE IDENTIFIER({schema});\n"
 		} else {
 			continue
 		}
 		sourceDir, err := b.Config.GetLocation("resources.jobs." + key).Directory()
 		if err != nil {
 			continue
 		}
 		localPath, _, err := GetLocalPath(ctx, b, sourceDir, relPath)
 		if err != nil {
 			// Any path errors are reported by another mutator
 			continue
 		}
 		if localPath == "" {
 			// If there is no local copy we don't want to download it and skip this check
 			continue
 		}
 		log.Warnf(ctx, "LocalPath: %s, relPath: %s, sourceDir: %s", localPath, relPath, sourceDir)
 		if !fileIncludesPattern(ctx, localPath, expected) {
 			diags = diags.Extend(diag.Diagnostics{{
 				Summary: fmt.Sprintf("Use the 'catalog' and 'schema' parameters provided via 'presets.catalog' and 'presets.schema' using\n\n" +
 					fix),
 				Severity: diag.Recommendation,
 				Locations: []dyn.Location{{
 					File:   localPath,
 					Line:   1,
 					Column: 1,
 				}},
 			}})
 		}
 	}
 	return diags
 }
 func fileIncludesPattern(ctx context.Context, filePath string, expected string) bool {
 	content, err := os.ReadFile(filePath)
 	if err != nil {
 		log.Warnf(ctx, "failed to check file %s: %v", filePath, err)
 		return true
 	}
 	matched, err := regexp.MatchString(expected, string(content))
 	if err != nil {
 		log.Warnf(ctx, "failed to check pattern in %s: %v", filePath, err)
 		return true
 	}
 	return matched
 }
--- a/bundle/config/mutator/apply_presets_test.go
+++ b/bundle/config/mutator/apply_presets_test.go
@ -453,3 +453,53 @@ func TestApplyPresetsSourceLinkedDeployment(t *testing.T) {
 	}
 }
 // func TestApplyPresetsRecommendsCatalogSchemaUsage(t *testing.T) {
 // 	dir := t.TempDir()
 // 	...
 // 	b := &bundle.Bundle{
 // 		Config: config.Root{
 // 			Resources: config.Resources{
 // 				Jobs: map[string]*resources.Job{
 // 					"job1": {
 // 						JobSettings: &jobs.JobSettings{
 // 							Tasks: []jobs.Task{
 // 								{
 // 									NotebookTask: &jobs.NotebookTask{
 // 										NotebookPath: notebookPath,
 // 									},
 // 								},
 // 								{
 // 									SparkPythonTask: &jobs.SparkPythonTask{
 // 										PythonFile: pythonPath,
 // 									},
 // 								},
 // 								{
 // 									NotebookTask: &jobs.NotebookTask{
 // 										NotebookPath: "/Workspace/absolute/path/notebook",
 // 									},
 // 								},
 // 							},
 // 						},
 // 					},
 // 				},
 // 			},
 // 		},
 // 	}
 // 	ctx := context.Background()
 // 	diags := bundle.Apply(ctx, b, ApplyPresets())
 // 	require.Len(t, diags, 2)
 // 	// Check notebook diagnostic
 // 	assert.Equal(t, notebookPath, diags[0].Locations[0].File)
 // 	assert.Equal(t, 1, diags[0].Locations[0].Line)
 // 	assert.Equal(t, 1, diags[0].Locations[0].Column)
 // 	// Check Python script diagnostic
 // 	assert.Equal(t, pythonPath, diags[1].Locations[0].File)
 // 	assert.Equal(t, 1, diags[1].Locations[0].Line)
 // 	assert.Equal(t, 1, diags[1].Locations[0].Column)
 // }
--- a/libs/template/templates/default-python/databricks_template_schema.json
+++ b/libs/template/templates/default-python/databricks_template_schema.json
@ -13,21 +13,21 @@
            "type": "string",
            "default": "yes",
            "enum": ["yes", "no"],
-            "description": "\n\nInclude a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'",
+            "description": "\nWould you like to include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'?",
            "order": 2
        },
        "include_dlt": {
            "type": "string",
            "default": "yes",
            "enum": ["yes", "no"],
-            "description": "Include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'",
+            "description": "Would you like to include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'?",
            "order": 3
        },
        "include_python": {
            "type": "string",
            "default": "yes",
            "enum": ["yes", "no"],
-            "description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'",
+            "description": "Would you like to include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'?",
            "order": 4
        },
        "default_catalog": {
--- a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl
@ -32,10 +32,19 @@
    "sys.path.append('../src')\n",
    "from {{.project_name}} import main\n",
    "\n",
-    "main.get_taxis(spark).show(10)"
+    "catalog = dbutils.widgets.get('catalog')\n",
-   {{else}}
+    "schema = dbutils.widgets.get('schema')\n",
-    "spark.range(10)"
+    "spark.sql(f'USE {catalog}.{schema}')\n",
-   {{end -}}
+    "\n",
    "spark.sql('SELECT * FROM example').show(10)"
    {{- else}}
    "# Load default catalog and schema as widget and set their values as the default catalog / schema\n",
    "catalog = dbutils.widgets.get('catalog')\n",
    "schema = dbutils.widgets.get('schema')\n",
    "spark.sql(f'USE {catalog}.{schema}')\n",
    "\n",
    "spark.sql('SELECT * FROM example').show(10)"
   {{- end}}
   ]
  }
 ],
@ -46,8 +55,63 @@
   "notebookMetadata": {
    "pythonIndentUnit": 2
   },
-   "notebookName": "ipynb-notebook",
+   "notebookName": "exploration",
-   "widgets": {}
+   "widgets": {
    "catalog": {
     "currentValue": "{{.default_catalog}}",
     "nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f1234",
     "typedWidgetInfo": {
      "autoCreated": false,
      "defaultValue": "{{.default_catalog}}",
      "label": null,
      "name": "catalog",
      "options": {
       "widgetDisplayType": "Text",
       "validationRegex": null
      },
      "parameterDataType": "String"
     },
     "widgetInfo": {
      "widgetType": "text",
      "defaultValue": "{{.default_catalog}}",
      "label": null,
      "name": "catalog",
      "options": {
       "widgetType": "text",
       "autoCreated": null,
       "validationRegex": null
      }
     }
    },
 {{- $dev_schema := .shared_schema }}
 {{- if (regexp "^yes").MatchString .personal_schemas}}
  {{- $dev_schema = "{{short_name}}"}}
 {{- end}}
    "schema": {
     "currentValue": "{{$dev_schema}}",
     "nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f5678",
     "typedWidgetInfo": {
      "autoCreated": false,
      "defaultValue": "{{$dev_schema}}",
      "label": null,
      "name": "schema",
      "options": {
       "widgetDisplayType": "Text",
       "validationRegex": null
      },
      "parameterDataType": "String"
     },
     "widgetInfo": {
      "widgetType": "text",
      "defaultValue": "{{$dev_schema}}",
      "label": null,
      "name": "schema",
      "options": {
       "widgetType": "text",
       "autoCreated": null,
       "validationRegex": null
      }
     }
  },
  "kernelspec": {
   "display_name": "Python 3",
--- a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl
@ -27,8 +27,8 @@ def main():
  # In the default template, these parameters are set
  # using the 'catalog' and 'schema' presets in databricks.yml.
  parser = argparse.ArgumentParser()
-  parser.add_argument('--catalog', '-c', required=True)
+  parser.add_argument('--catalog', required=True)
-  parser.add_argument('--schema', '-s', required=True)
+  parser.add_argument('--schema', required=True)
  args, unknown = parser.parse_known_args()
  spark = get_spark()
  spark.sql(f"USE {args.catalog}.{args.schema}")