Extend catalog/schema preset support

This commit is contained in:
Lennart Kats 2024-12-06 22:12:42 +01:00
parent 963f5e6636
commit 2addd0c506
No known key found for this signature in database
GPG Key ID: 1EB8B57673197023
5 changed files with 213 additions and 38 deletions

View File

@ -3,7 +3,9 @@ package mutator
import ( import (
"context" "context"
"fmt" "fmt"
"os"
"path" "path"
"regexp"
"slices" "slices"
"sort" "sort"
"strings" "strings"
@ -14,6 +16,7 @@ import (
"github.com/databricks/cli/libs/dbr" "github.com/databricks/cli/libs/dbr"
"github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn" "github.com/databricks/cli/libs/dyn"
"github.com/databricks/cli/libs/log"
"github.com/databricks/cli/libs/textutil" "github.com/databricks/cli/libs/textutil"
"github.com/databricks/databricks-sdk-go/service/catalog" "github.com/databricks/databricks-sdk-go/service/catalog"
"github.com/databricks/databricks-sdk-go/service/jobs" "github.com/databricks/databricks-sdk-go/service/jobs"
@ -100,7 +103,7 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos
} }
diags = diags.Extend(addCatalogSchemaParameters(b, key, j, t)) diags = diags.Extend(addCatalogSchemaParameters(b, key, j, t))
diags = diags.Extend(validateCatalogSchemaUsage(b, key, j)) diags = diags.Extend(recommendCatalogSchemaUsage(b, ctx, key, j))
} }
} }
@ -333,31 +336,6 @@ func validateCatalogAndSchema(b *bundle.Bundle) diag.Diagnostics {
} }
return nil return nil
} }
func validateCatalogSchemaUsage(b *bundle.Bundle, key string, job *resources.Job) diag.Diagnostics {
for _, t := range job.Tasks {
if t.NotebookTask != nil {
// TODO: proper validation that warns
return diag.Diagnostics{{
Summary: fmt.Sprintf("job %s must read catalog and schema from parameters for notebook %s",
key, t.NotebookTask.NotebookPath),
Severity: diag.Recommendation,
Locations: []dyn.Location{b.Config.GetLocation("resources.jobs." + key)},
}}
}
}
// TODO: validate that any notebooks actually read the catalog/schema arguments
// if ... {
// return diag.Diagnostics{{
// Summary: fmt.Sprintf("job %s must pass catalog and schema presets as parameters as follows:\n"+
// " ...", key),
// Severity: diag.Error,
// Locations: []dyn.Location{b.Config.GetLocation("resources.jobs." + key)},
// }}
// }
return nil
}
// toTagArray converts a map of tags to an array of tags. // toTagArray converts a map of tags to an array of tags.
// We sort tags so ensure stable ordering. // We sort tags so ensure stable ordering.
@ -443,3 +421,86 @@ func addCatalogSchemaParameters(b *bundle.Bundle, key string, job *resources.Job
return diags return diags
} }
func recommendCatalogSchemaUsage(b *bundle.Bundle, ctx context.Context, key string, job *resources.Job) diag.Diagnostics {
var diags diag.Diagnostics
for _, t := range job.Tasks {
var relPath string
var expected string
var fix string
if t.NotebookTask != nil {
relPath = t.NotebookTask.NotebookPath
expected = `" dbutils.widgets.text(['"]schema|` +
`USE[^)]+schema`
fix = " dbutils.widgets.text('catalog')\n" +
" dbutils.widgets.text('schema')\n" +
" catalog = dbutils.widgets.get('catalog')\n" +
" schema = dbutils.widgets.get('schema')\n" +
" spark.sql(f'USE {catalog}.{schema}')\n"
} else if t.SparkPythonTask != nil {
relPath = t.SparkPythonTask.PythonFile
expected = `add_argument\(['"]--catalog'|` +
`USE[^)]+catalog`
fix = " def main():\n" +
" parser = argparse.ArgumentParser()\n" +
" parser.add_argument('--catalog', required=True)\n" +
" parser.add_argument('--schema', '-s', required=True)\n" +
" args, unknown = parser.parse_known_args()\n" +
" spark.sql(f\"USE {args.catalog}.{args.schema}\")\n"
} else if t.SqlTask != nil && t.SqlTask.File != nil {
relPath = t.SqlTask.File.Path
expected = `:schema|\{\{schema\}\}`
fix = " USE CATALOG {{catalog}};\n" +
" USE IDENTIFIER({schema});\n"
} else {
continue
}
sourceDir, err := b.Config.GetLocation("resources.jobs." + key).Directory()
if err != nil {
continue
}
localPath, _, err := GetLocalPath(ctx, b, sourceDir, relPath)
if err != nil {
// Any path errors are reported by another mutator
continue
}
if localPath == "" {
// If there is no local copy we don't want to download it and skip this check
continue
}
log.Warnf(ctx, "LocalPath: %s, relPath: %s, sourceDir: %s", localPath, relPath, sourceDir)
if !fileIncludesPattern(ctx, localPath, expected) {
diags = diags.Extend(diag.Diagnostics{{
Summary: fmt.Sprintf("Use the 'catalog' and 'schema' parameters provided via 'presets.catalog' and 'presets.schema' using\n\n" +
fix),
Severity: diag.Recommendation,
Locations: []dyn.Location{{
File: localPath,
Line: 1,
Column: 1,
}},
}})
}
}
return diags
}
func fileIncludesPattern(ctx context.Context, filePath string, expected string) bool {
content, err := os.ReadFile(filePath)
if err != nil {
log.Warnf(ctx, "failed to check file %s: %v", filePath, err)
return true
}
matched, err := regexp.MatchString(expected, string(content))
if err != nil {
log.Warnf(ctx, "failed to check pattern in %s: %v", filePath, err)
return true
}
return matched
}

View File

@ -453,3 +453,53 @@ func TestApplyPresetsSourceLinkedDeployment(t *testing.T) {
} }
} }
// func TestApplyPresetsRecommendsCatalogSchemaUsage(t *testing.T) {
// dir := t.TempDir()
// ...
// b := &bundle.Bundle{
// Config: config.Root{
// Resources: config.Resources{
// Jobs: map[string]*resources.Job{
// "job1": {
// JobSettings: &jobs.JobSettings{
// Tasks: []jobs.Task{
// {
// NotebookTask: &jobs.NotebookTask{
// NotebookPath: notebookPath,
// },
// },
// {
// SparkPythonTask: &jobs.SparkPythonTask{
// PythonFile: pythonPath,
// },
// },
// {
// NotebookTask: &jobs.NotebookTask{
// NotebookPath: "/Workspace/absolute/path/notebook",
// },
// },
// },
// },
// },
// },
// },
// },
// }
// ctx := context.Background()
// diags := bundle.Apply(ctx, b, ApplyPresets())
// require.Len(t, diags, 2)
// // Check notebook diagnostic
// assert.Equal(t, notebookPath, diags[0].Locations[0].File)
// assert.Equal(t, 1, diags[0].Locations[0].Line)
// assert.Equal(t, 1, diags[0].Locations[0].Column)
// // Check Python script diagnostic
// assert.Equal(t, pythonPath, diags[1].Locations[0].File)
// assert.Equal(t, 1, diags[1].Locations[0].Line)
// assert.Equal(t, 1, diags[1].Locations[0].Column)
// }

View File

@ -13,21 +13,21 @@
"type": "string", "type": "string",
"default": "yes", "default": "yes",
"enum": ["yes", "no"], "enum": ["yes", "no"],
"description": "\n\nInclude a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'", "description": "\nWould you like to include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'?",
"order": 2 "order": 2
}, },
"include_dlt": { "include_dlt": {
"type": "string", "type": "string",
"default": "yes", "default": "yes",
"enum": ["yes", "no"], "enum": ["yes", "no"],
"description": "Include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'", "description": "Would you like to include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'?",
"order": 3 "order": 3
}, },
"include_python": { "include_python": {
"type": "string", "type": "string",
"default": "yes", "default": "yes",
"enum": ["yes", "no"], "enum": ["yes", "no"],
"description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'", "description": "Would you like to include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'?",
"order": 4 "order": 4
}, },
"default_catalog": { "default_catalog": {

View File

@ -32,10 +32,19 @@
"sys.path.append('../src')\n", "sys.path.append('../src')\n",
"from {{.project_name}} import main\n", "from {{.project_name}} import main\n",
"\n", "\n",
"main.get_taxis(spark).show(10)" "catalog = dbutils.widgets.get('catalog')\n",
{{else}} "schema = dbutils.widgets.get('schema')\n",
"spark.range(10)" "spark.sql(f'USE {catalog}.{schema}')\n",
{{end -}} "\n",
"spark.sql('SELECT * FROM example').show(10)"
{{- else}}
"# Load default catalog and schema as widget and set their values as the default catalog / schema\n",
"catalog = dbutils.widgets.get('catalog')\n",
"schema = dbutils.widgets.get('schema')\n",
"spark.sql(f'USE {catalog}.{schema}')\n",
"\n",
"spark.sql('SELECT * FROM example').show(10)"
{{- end}}
] ]
} }
], ],
@ -46,8 +55,63 @@
"notebookMetadata": { "notebookMetadata": {
"pythonIndentUnit": 2 "pythonIndentUnit": 2
}, },
"notebookName": "ipynb-notebook", "notebookName": "exploration",
"widgets": {} "widgets": {
"catalog": {
"currentValue": "{{.default_catalog}}",
"nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f1234",
"typedWidgetInfo": {
"autoCreated": false,
"defaultValue": "{{.default_catalog}}",
"label": null,
"name": "catalog",
"options": {
"widgetDisplayType": "Text",
"validationRegex": null
},
"parameterDataType": "String"
},
"widgetInfo": {
"widgetType": "text",
"defaultValue": "{{.default_catalog}}",
"label": null,
"name": "catalog",
"options": {
"widgetType": "text",
"autoCreated": null,
"validationRegex": null
}
}
},
{{- $dev_schema := .shared_schema }}
{{- if (regexp "^yes").MatchString .personal_schemas}}
{{- $dev_schema = "{{short_name}}"}}
{{- end}}
"schema": {
"currentValue": "{{$dev_schema}}",
"nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f5678",
"typedWidgetInfo": {
"autoCreated": false,
"defaultValue": "{{$dev_schema}}",
"label": null,
"name": "schema",
"options": {
"widgetDisplayType": "Text",
"validationRegex": null
},
"parameterDataType": "String"
},
"widgetInfo": {
"widgetType": "text",
"defaultValue": "{{$dev_schema}}",
"label": null,
"name": "schema",
"options": {
"widgetType": "text",
"autoCreated": null,
"validationRegex": null
}
}
}, },
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3",

View File

@ -27,8 +27,8 @@ def main():
# In the default template, these parameters are set # In the default template, these parameters are set
# using the 'catalog' and 'schema' presets in databricks.yml. # using the 'catalog' and 'schema' presets in databricks.yml.
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--catalog', '-c', required=True) parser.add_argument('--catalog', required=True)
parser.add_argument('--schema', '-s', required=True) parser.add_argument('--schema', required=True)
args, unknown = parser.parse_known_args() args, unknown = parser.parse_known_args()
spark = get_spark() spark = get_spark()
spark.sql(f"USE {args.catalog}.{args.schema}") spark.sql(f"USE {args.catalog}.{args.schema}")