Extend catalog/schema preset support

This commit is contained in:
Lennart Kats 2024-12-06 22:12:42 +01:00
parent 963f5e6636
commit 2addd0c506
No known key found for this signature in database
GPG Key ID: 1EB8B57673197023
5 changed files with 213 additions and 38 deletions

View File

@ -3,7 +3,9 @@ package mutator
import (
"context"
"fmt"
"os"
"path"
"regexp"
"slices"
"sort"
"strings"
@ -14,6 +16,7 @@ import (
"github.com/databricks/cli/libs/dbr"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/textutil"
"github.com/databricks/databricks-sdk-go/service/catalog"
"github.com/databricks/databricks-sdk-go/service/jobs"
@ -100,7 +103,7 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos
}
diags = diags.Extend(addCatalogSchemaParameters(b, key, j, t))
diags = diags.Extend(validateCatalogSchemaUsage(b, key, j))
diags = diags.Extend(recommendCatalogSchemaUsage(b, ctx, key, j))
}
}
@ -333,31 +336,6 @@ func validateCatalogAndSchema(b *bundle.Bundle) diag.Diagnostics {
}
return nil
}
func validateCatalogSchemaUsage(b *bundle.Bundle, key string, job *resources.Job) diag.Diagnostics {
for _, t := range job.Tasks {
if t.NotebookTask != nil {
// TODO: proper validation that warns
return diag.Diagnostics{{
Summary: fmt.Sprintf("job %s must read catalog and schema from parameters for notebook %s",
key, t.NotebookTask.NotebookPath),
Severity: diag.Recommendation,
Locations: []dyn.Location{b.Config.GetLocation("resources.jobs." + key)},
}}
}
}
// TODO: validate that any notebooks actually read the catalog/schema arguments
// if ... {
// return diag.Diagnostics{{
// Summary: fmt.Sprintf("job %s must pass catalog and schema presets as parameters as follows:\n"+
// " ...", key),
// Severity: diag.Error,
// Locations: []dyn.Location{b.Config.GetLocation("resources.jobs." + key)},
// }}
// }
return nil
}
// toTagArray converts a map of tags to an array of tags.
// We sort tags so ensure stable ordering.
@ -443,3 +421,86 @@ func addCatalogSchemaParameters(b *bundle.Bundle, key string, job *resources.Job
return diags
}
func recommendCatalogSchemaUsage(b *bundle.Bundle, ctx context.Context, key string, job *resources.Job) diag.Diagnostics {
var diags diag.Diagnostics
for _, t := range job.Tasks {
var relPath string
var expected string
var fix string
if t.NotebookTask != nil {
relPath = t.NotebookTask.NotebookPath
expected = `" dbutils.widgets.text(['"]schema|` +
`USE[^)]+schema`
fix = " dbutils.widgets.text('catalog')\n" +
" dbutils.widgets.text('schema')\n" +
" catalog = dbutils.widgets.get('catalog')\n" +
" schema = dbutils.widgets.get('schema')\n" +
" spark.sql(f'USE {catalog}.{schema}')\n"
} else if t.SparkPythonTask != nil {
relPath = t.SparkPythonTask.PythonFile
expected = `add_argument\(['"]--catalog'|` +
`USE[^)]+catalog`
fix = " def main():\n" +
" parser = argparse.ArgumentParser()\n" +
" parser.add_argument('--catalog', required=True)\n" +
" parser.add_argument('--schema', '-s', required=True)\n" +
" args, unknown = parser.parse_known_args()\n" +
" spark.sql(f\"USE {args.catalog}.{args.schema}\")\n"
} else if t.SqlTask != nil && t.SqlTask.File != nil {
relPath = t.SqlTask.File.Path
expected = `:schema|\{\{schema\}\}`
fix = " USE CATALOG {{catalog}};\n" +
" USE IDENTIFIER({schema});\n"
} else {
continue
}
sourceDir, err := b.Config.GetLocation("resources.jobs." + key).Directory()
if err != nil {
continue
}
localPath, _, err := GetLocalPath(ctx, b, sourceDir, relPath)
if err != nil {
// Any path errors are reported by another mutator
continue
}
if localPath == "" {
// If there is no local copy we don't want to download it and skip this check
continue
}
log.Warnf(ctx, "LocalPath: %s, relPath: %s, sourceDir: %s", localPath, relPath, sourceDir)
if !fileIncludesPattern(ctx, localPath, expected) {
diags = diags.Extend(diag.Diagnostics{{
Summary: fmt.Sprintf("Use the 'catalog' and 'schema' parameters provided via 'presets.catalog' and 'presets.schema' using\n\n" +
fix),
Severity: diag.Recommendation,
Locations: []dyn.Location{{
File: localPath,
Line: 1,
Column: 1,
}},
}})
}
}
return diags
}
func fileIncludesPattern(ctx context.Context, filePath string, expected string) bool {
content, err := os.ReadFile(filePath)
if err != nil {
log.Warnf(ctx, "failed to check file %s: %v", filePath, err)
return true
}
matched, err := regexp.MatchString(expected, string(content))
if err != nil {
log.Warnf(ctx, "failed to check pattern in %s: %v", filePath, err)
return true
}
return matched
}

View File

@ -453,3 +453,53 @@ func TestApplyPresetsSourceLinkedDeployment(t *testing.T) {
}
}
// func TestApplyPresetsRecommendsCatalogSchemaUsage(t *testing.T) {
// dir := t.TempDir()
// ...
// b := &bundle.Bundle{
// Config: config.Root{
// Resources: config.Resources{
// Jobs: map[string]*resources.Job{
// "job1": {
// JobSettings: &jobs.JobSettings{
// Tasks: []jobs.Task{
// {
// NotebookTask: &jobs.NotebookTask{
// NotebookPath: notebookPath,
// },
// },
// {
// SparkPythonTask: &jobs.SparkPythonTask{
// PythonFile: pythonPath,
// },
// },
// {
// NotebookTask: &jobs.NotebookTask{
// NotebookPath: "/Workspace/absolute/path/notebook",
// },
// },
// },
// },
// },
// },
// },
// },
// }
// ctx := context.Background()
// diags := bundle.Apply(ctx, b, ApplyPresets())
// require.Len(t, diags, 2)
// // Check notebook diagnostic
// assert.Equal(t, notebookPath, diags[0].Locations[0].File)
// assert.Equal(t, 1, diags[0].Locations[0].Line)
// assert.Equal(t, 1, diags[0].Locations[0].Column)
// // Check Python script diagnostic
// assert.Equal(t, pythonPath, diags[1].Locations[0].File)
// assert.Equal(t, 1, diags[1].Locations[0].Line)
// assert.Equal(t, 1, diags[1].Locations[0].Column)
// }

View File

@ -13,21 +13,21 @@
"type": "string",
"default": "yes",
"enum": ["yes", "no"],
"description": "\n\nInclude a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'",
"description": "\nWould you like to include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'?",
"order": 2
},
"include_dlt": {
"type": "string",
"default": "yes",
"enum": ["yes", "no"],
"description": "Include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'",
"description": "Would you like to include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'?",
"order": 3
},
"include_python": {
"type": "string",
"default": "yes",
"enum": ["yes", "no"],
"description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'",
"description": "Would you like to include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'?",
"order": 4
},
"default_catalog": {

View File

@ -27,15 +27,24 @@
},
"outputs": [],
"source": [
{{- if (eq .include_python "yes") }}
{{- if (eq .include_python "yes") }}
"import sys\n",
"sys.path.append('../src')\n",
"from {{.project_name}} import main\n",
"\n",
"main.get_taxis(spark).show(10)"
{{else}}
"spark.range(10)"
{{end -}}
"catalog = dbutils.widgets.get('catalog')\n",
"schema = dbutils.widgets.get('schema')\n",
"spark.sql(f'USE {catalog}.{schema}')\n",
"\n",
"spark.sql('SELECT * FROM example').show(10)"
{{- else}}
"# Load default catalog and schema as widget and set their values as the default catalog / schema\n",
"catalog = dbutils.widgets.get('catalog')\n",
"schema = dbutils.widgets.get('schema')\n",
"spark.sql(f'USE {catalog}.{schema}')\n",
"\n",
"spark.sql('SELECT * FROM example').show(10)"
{{- end}}
]
}
],
@ -46,8 +55,63 @@
"notebookMetadata": {
"pythonIndentUnit": 2
},
"notebookName": "ipynb-notebook",
"widgets": {}
"notebookName": "exploration",
"widgets": {
"catalog": {
"currentValue": "{{.default_catalog}}",
"nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f1234",
"typedWidgetInfo": {
"autoCreated": false,
"defaultValue": "{{.default_catalog}}",
"label": null,
"name": "catalog",
"options": {
"widgetDisplayType": "Text",
"validationRegex": null
},
"parameterDataType": "String"
},
"widgetInfo": {
"widgetType": "text",
"defaultValue": "{{.default_catalog}}",
"label": null,
"name": "catalog",
"options": {
"widgetType": "text",
"autoCreated": null,
"validationRegex": null
}
}
},
{{- $dev_schema := .shared_schema }}
{{- if (regexp "^yes").MatchString .personal_schemas}}
{{- $dev_schema = "{{short_name}}"}}
{{- end}}
"schema": {
"currentValue": "{{$dev_schema}}",
"nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f5678",
"typedWidgetInfo": {
"autoCreated": false,
"defaultValue": "{{$dev_schema}}",
"label": null,
"name": "schema",
"options": {
"widgetDisplayType": "Text",
"validationRegex": null
},
"parameterDataType": "String"
},
"widgetInfo": {
"widgetType": "text",
"defaultValue": "{{$dev_schema}}",
"label": null,
"name": "schema",
"options": {
"widgetType": "text",
"autoCreated": null,
"validationRegex": null
}
}
},
"kernelspec": {
"display_name": "Python 3",

View File

@ -27,8 +27,8 @@ def main():
# In the default template, these parameters are set
# using the 'catalog' and 'schema' presets in databricks.yml.
parser = argparse.ArgumentParser()
parser.add_argument('--catalog', '-c', required=True)
parser.add_argument('--schema', '-s', required=True)
parser.add_argument('--catalog', required=True)
parser.add_argument('--schema', required=True)
args, unknown = parser.parse_known_args()
spark = get_spark()
spark.sql(f"USE {args.catalog}.{args.schema}")