mirror of https://github.com/databricks/cli.git
Extend catalog/schema preset support
This commit is contained in:
parent
963f5e6636
commit
2addd0c506
|
@ -3,7 +3,9 @@ package mutator
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path"
|
||||
"regexp"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
|
@ -14,6 +16,7 @@ import (
|
|||
"github.com/databricks/cli/libs/dbr"
|
||||
"github.com/databricks/cli/libs/diag"
|
||||
"github.com/databricks/cli/libs/dyn"
|
||||
"github.com/databricks/cli/libs/log"
|
||||
"github.com/databricks/cli/libs/textutil"
|
||||
"github.com/databricks/databricks-sdk-go/service/catalog"
|
||||
"github.com/databricks/databricks-sdk-go/service/jobs"
|
||||
|
@ -100,7 +103,7 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos
|
|||
}
|
||||
|
||||
diags = diags.Extend(addCatalogSchemaParameters(b, key, j, t))
|
||||
diags = diags.Extend(validateCatalogSchemaUsage(b, key, j))
|
||||
diags = diags.Extend(recommendCatalogSchemaUsage(b, ctx, key, j))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -333,31 +336,6 @@ func validateCatalogAndSchema(b *bundle.Bundle) diag.Diagnostics {
|
|||
}
|
||||
return nil
|
||||
}
|
||||
func validateCatalogSchemaUsage(b *bundle.Bundle, key string, job *resources.Job) diag.Diagnostics {
|
||||
for _, t := range job.Tasks {
|
||||
if t.NotebookTask != nil {
|
||||
// TODO: proper validation that warns
|
||||
return diag.Diagnostics{{
|
||||
Summary: fmt.Sprintf("job %s must read catalog and schema from parameters for notebook %s",
|
||||
key, t.NotebookTask.NotebookPath),
|
||||
Severity: diag.Recommendation,
|
||||
Locations: []dyn.Location{b.Config.GetLocation("resources.jobs." + key)},
|
||||
}}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: validate that any notebooks actually read the catalog/schema arguments
|
||||
// if ... {
|
||||
// return diag.Diagnostics{{
|
||||
// Summary: fmt.Sprintf("job %s must pass catalog and schema presets as parameters as follows:\n"+
|
||||
// " ...", key),
|
||||
// Severity: diag.Error,
|
||||
// Locations: []dyn.Location{b.Config.GetLocation("resources.jobs." + key)},
|
||||
// }}
|
||||
// }
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// toTagArray converts a map of tags to an array of tags.
|
||||
// We sort tags so ensure stable ordering.
|
||||
|
@ -443,3 +421,86 @@ func addCatalogSchemaParameters(b *bundle.Bundle, key string, job *resources.Job
|
|||
|
||||
return diags
|
||||
}
|
||||
|
||||
func recommendCatalogSchemaUsage(b *bundle.Bundle, ctx context.Context, key string, job *resources.Job) diag.Diagnostics {
|
||||
var diags diag.Diagnostics
|
||||
for _, t := range job.Tasks {
|
||||
var relPath string
|
||||
var expected string
|
||||
var fix string
|
||||
if t.NotebookTask != nil {
|
||||
relPath = t.NotebookTask.NotebookPath
|
||||
expected = `" dbutils.widgets.text(['"]schema|` +
|
||||
`USE[^)]+schema`
|
||||
fix = " dbutils.widgets.text('catalog')\n" +
|
||||
" dbutils.widgets.text('schema')\n" +
|
||||
" catalog = dbutils.widgets.get('catalog')\n" +
|
||||
" schema = dbutils.widgets.get('schema')\n" +
|
||||
" spark.sql(f'USE {catalog}.{schema}')\n"
|
||||
} else if t.SparkPythonTask != nil {
|
||||
relPath = t.SparkPythonTask.PythonFile
|
||||
expected = `add_argument\(['"]--catalog'|` +
|
||||
`USE[^)]+catalog`
|
||||
fix = " def main():\n" +
|
||||
" parser = argparse.ArgumentParser()\n" +
|
||||
" parser.add_argument('--catalog', required=True)\n" +
|
||||
" parser.add_argument('--schema', '-s', required=True)\n" +
|
||||
" args, unknown = parser.parse_known_args()\n" +
|
||||
" spark.sql(f\"USE {args.catalog}.{args.schema}\")\n"
|
||||
} else if t.SqlTask != nil && t.SqlTask.File != nil {
|
||||
relPath = t.SqlTask.File.Path
|
||||
expected = `:schema|\{\{schema\}\}`
|
||||
fix = " USE CATALOG {{catalog}};\n" +
|
||||
" USE IDENTIFIER({schema});\n"
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
|
||||
sourceDir, err := b.Config.GetLocation("resources.jobs." + key).Directory()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
localPath, _, err := GetLocalPath(ctx, b, sourceDir, relPath)
|
||||
if err != nil {
|
||||
// Any path errors are reported by another mutator
|
||||
continue
|
||||
}
|
||||
if localPath == "" {
|
||||
// If there is no local copy we don't want to download it and skip this check
|
||||
continue
|
||||
}
|
||||
|
||||
log.Warnf(ctx, "LocalPath: %s, relPath: %s, sourceDir: %s", localPath, relPath, sourceDir)
|
||||
|
||||
if !fileIncludesPattern(ctx, localPath, expected) {
|
||||
diags = diags.Extend(diag.Diagnostics{{
|
||||
Summary: fmt.Sprintf("Use the 'catalog' and 'schema' parameters provided via 'presets.catalog' and 'presets.schema' using\n\n" +
|
||||
fix),
|
||||
Severity: diag.Recommendation,
|
||||
Locations: []dyn.Location{{
|
||||
File: localPath,
|
||||
Line: 1,
|
||||
Column: 1,
|
||||
}},
|
||||
}})
|
||||
}
|
||||
}
|
||||
|
||||
return diags
|
||||
}
|
||||
|
||||
func fileIncludesPattern(ctx context.Context, filePath string, expected string) bool {
|
||||
content, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
log.Warnf(ctx, "failed to check file %s: %v", filePath, err)
|
||||
return true
|
||||
}
|
||||
|
||||
matched, err := regexp.MatchString(expected, string(content))
|
||||
if err != nil {
|
||||
log.Warnf(ctx, "failed to check pattern in %s: %v", filePath, err)
|
||||
return true
|
||||
}
|
||||
return matched
|
||||
}
|
||||
|
|
|
@ -453,3 +453,53 @@ func TestApplyPresetsSourceLinkedDeployment(t *testing.T) {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
// func TestApplyPresetsRecommendsCatalogSchemaUsage(t *testing.T) {
|
||||
// dir := t.TempDir()
|
||||
|
||||
// ...
|
||||
|
||||
// b := &bundle.Bundle{
|
||||
// Config: config.Root{
|
||||
// Resources: config.Resources{
|
||||
// Jobs: map[string]*resources.Job{
|
||||
// "job1": {
|
||||
// JobSettings: &jobs.JobSettings{
|
||||
// Tasks: []jobs.Task{
|
||||
// {
|
||||
// NotebookTask: &jobs.NotebookTask{
|
||||
// NotebookPath: notebookPath,
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// SparkPythonTask: &jobs.SparkPythonTask{
|
||||
// PythonFile: pythonPath,
|
||||
// },
|
||||
// },
|
||||
// {
|
||||
// NotebookTask: &jobs.NotebookTask{
|
||||
// NotebookPath: "/Workspace/absolute/path/notebook",
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// },
|
||||
// }
|
||||
|
||||
// ctx := context.Background()
|
||||
// diags := bundle.Apply(ctx, b, ApplyPresets())
|
||||
// require.Len(t, diags, 2)
|
||||
|
||||
// // Check notebook diagnostic
|
||||
// assert.Equal(t, notebookPath, diags[0].Locations[0].File)
|
||||
// assert.Equal(t, 1, diags[0].Locations[0].Line)
|
||||
// assert.Equal(t, 1, diags[0].Locations[0].Column)
|
||||
|
||||
// // Check Python script diagnostic
|
||||
// assert.Equal(t, pythonPath, diags[1].Locations[0].File)
|
||||
// assert.Equal(t, 1, diags[1].Locations[0].Line)
|
||||
// assert.Equal(t, 1, diags[1].Locations[0].Column)
|
||||
// }
|
||||
|
|
|
@ -13,21 +13,21 @@
|
|||
"type": "string",
|
||||
"default": "yes",
|
||||
"enum": ["yes", "no"],
|
||||
"description": "\n\nInclude a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'",
|
||||
"description": "\nWould you like to include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'?",
|
||||
"order": 2
|
||||
},
|
||||
"include_dlt": {
|
||||
"type": "string",
|
||||
"default": "yes",
|
||||
"enum": ["yes", "no"],
|
||||
"description": "Include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'",
|
||||
"description": "Would you like to include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'?",
|
||||
"order": 3
|
||||
},
|
||||
"include_python": {
|
||||
"type": "string",
|
||||
"default": "yes",
|
||||
"enum": ["yes", "no"],
|
||||
"description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'",
|
||||
"description": "Would you like to include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'?",
|
||||
"order": 4
|
||||
},
|
||||
"default_catalog": {
|
||||
|
|
|
@ -27,15 +27,24 @@
|
|||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
{{- if (eq .include_python "yes") }}
|
||||
{{- if (eq .include_python "yes") }}
|
||||
"import sys\n",
|
||||
"sys.path.append('../src')\n",
|
||||
"from {{.project_name}} import main\n",
|
||||
"\n",
|
||||
"main.get_taxis(spark).show(10)"
|
||||
{{else}}
|
||||
"spark.range(10)"
|
||||
{{end -}}
|
||||
"catalog = dbutils.widgets.get('catalog')\n",
|
||||
"schema = dbutils.widgets.get('schema')\n",
|
||||
"spark.sql(f'USE {catalog}.{schema}')\n",
|
||||
"\n",
|
||||
"spark.sql('SELECT * FROM example').show(10)"
|
||||
{{- else}}
|
||||
"# Load default catalog and schema as widget and set their values as the default catalog / schema\n",
|
||||
"catalog = dbutils.widgets.get('catalog')\n",
|
||||
"schema = dbutils.widgets.get('schema')\n",
|
||||
"spark.sql(f'USE {catalog}.{schema}')\n",
|
||||
"\n",
|
||||
"spark.sql('SELECT * FROM example').show(10)"
|
||||
{{- end}}
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -46,8 +55,63 @@
|
|||
"notebookMetadata": {
|
||||
"pythonIndentUnit": 2
|
||||
},
|
||||
"notebookName": "ipynb-notebook",
|
||||
"widgets": {}
|
||||
"notebookName": "exploration",
|
||||
"widgets": {
|
||||
"catalog": {
|
||||
"currentValue": "{{.default_catalog}}",
|
||||
"nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f1234",
|
||||
"typedWidgetInfo": {
|
||||
"autoCreated": false,
|
||||
"defaultValue": "{{.default_catalog}}",
|
||||
"label": null,
|
||||
"name": "catalog",
|
||||
"options": {
|
||||
"widgetDisplayType": "Text",
|
||||
"validationRegex": null
|
||||
},
|
||||
"parameterDataType": "String"
|
||||
},
|
||||
"widgetInfo": {
|
||||
"widgetType": "text",
|
||||
"defaultValue": "{{.default_catalog}}",
|
||||
"label": null,
|
||||
"name": "catalog",
|
||||
"options": {
|
||||
"widgetType": "text",
|
||||
"autoCreated": null,
|
||||
"validationRegex": null
|
||||
}
|
||||
}
|
||||
},
|
||||
{{- $dev_schema := .shared_schema }}
|
||||
{{- if (regexp "^yes").MatchString .personal_schemas}}
|
||||
{{- $dev_schema = "{{short_name}}"}}
|
||||
{{- end}}
|
||||
"schema": {
|
||||
"currentValue": "{{$dev_schema}}",
|
||||
"nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f5678",
|
||||
"typedWidgetInfo": {
|
||||
"autoCreated": false,
|
||||
"defaultValue": "{{$dev_schema}}",
|
||||
"label": null,
|
||||
"name": "schema",
|
||||
"options": {
|
||||
"widgetDisplayType": "Text",
|
||||
"validationRegex": null
|
||||
},
|
||||
"parameterDataType": "String"
|
||||
},
|
||||
"widgetInfo": {
|
||||
"widgetType": "text",
|
||||
"defaultValue": "{{$dev_schema}}",
|
||||
"label": null,
|
||||
"name": "schema",
|
||||
"options": {
|
||||
"widgetType": "text",
|
||||
"autoCreated": null,
|
||||
"validationRegex": null
|
||||
}
|
||||
}
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
|
|
|
@ -27,8 +27,8 @@ def main():
|
|||
# In the default template, these parameters are set
|
||||
# using the 'catalog' and 'schema' presets in databricks.yml.
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--catalog', '-c', required=True)
|
||||
parser.add_argument('--schema', '-s', required=True)
|
||||
parser.add_argument('--catalog', required=True)
|
||||
parser.add_argument('--schema', required=True)
|
||||
args, unknown = parser.parse_known_args()
|
||||
spark = get_spark()
|
||||
spark.sql(f"USE {args.catalog}.{args.schema}")
|
||||
|
|
Loading…
Reference in New Issue