mirror of https://github.com/databricks/cli.git
Extend catalog/schema preset support
This commit is contained in:
parent
963f5e6636
commit
2addd0c506
|
@ -3,7 +3,9 @@ package mutator
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
"path"
|
"path"
|
||||||
|
"regexp"
|
||||||
"slices"
|
"slices"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -14,6 +16,7 @@ import (
|
||||||
"github.com/databricks/cli/libs/dbr"
|
"github.com/databricks/cli/libs/dbr"
|
||||||
"github.com/databricks/cli/libs/diag"
|
"github.com/databricks/cli/libs/diag"
|
||||||
"github.com/databricks/cli/libs/dyn"
|
"github.com/databricks/cli/libs/dyn"
|
||||||
|
"github.com/databricks/cli/libs/log"
|
||||||
"github.com/databricks/cli/libs/textutil"
|
"github.com/databricks/cli/libs/textutil"
|
||||||
"github.com/databricks/databricks-sdk-go/service/catalog"
|
"github.com/databricks/databricks-sdk-go/service/catalog"
|
||||||
"github.com/databricks/databricks-sdk-go/service/jobs"
|
"github.com/databricks/databricks-sdk-go/service/jobs"
|
||||||
|
@ -100,7 +103,7 @@ func (m *applyPresets) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnos
|
||||||
}
|
}
|
||||||
|
|
||||||
diags = diags.Extend(addCatalogSchemaParameters(b, key, j, t))
|
diags = diags.Extend(addCatalogSchemaParameters(b, key, j, t))
|
||||||
diags = diags.Extend(validateCatalogSchemaUsage(b, key, j))
|
diags = diags.Extend(recommendCatalogSchemaUsage(b, ctx, key, j))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -333,31 +336,6 @@ func validateCatalogAndSchema(b *bundle.Bundle) diag.Diagnostics {
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
func validateCatalogSchemaUsage(b *bundle.Bundle, key string, job *resources.Job) diag.Diagnostics {
|
|
||||||
for _, t := range job.Tasks {
|
|
||||||
if t.NotebookTask != nil {
|
|
||||||
// TODO: proper validation that warns
|
|
||||||
return diag.Diagnostics{{
|
|
||||||
Summary: fmt.Sprintf("job %s must read catalog and schema from parameters for notebook %s",
|
|
||||||
key, t.NotebookTask.NotebookPath),
|
|
||||||
Severity: diag.Recommendation,
|
|
||||||
Locations: []dyn.Location{b.Config.GetLocation("resources.jobs." + key)},
|
|
||||||
}}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: validate that any notebooks actually read the catalog/schema arguments
|
|
||||||
// if ... {
|
|
||||||
// return diag.Diagnostics{{
|
|
||||||
// Summary: fmt.Sprintf("job %s must pass catalog and schema presets as parameters as follows:\n"+
|
|
||||||
// " ...", key),
|
|
||||||
// Severity: diag.Error,
|
|
||||||
// Locations: []dyn.Location{b.Config.GetLocation("resources.jobs." + key)},
|
|
||||||
// }}
|
|
||||||
// }
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// toTagArray converts a map of tags to an array of tags.
|
// toTagArray converts a map of tags to an array of tags.
|
||||||
// We sort tags so ensure stable ordering.
|
// We sort tags so ensure stable ordering.
|
||||||
|
@ -443,3 +421,86 @@ func addCatalogSchemaParameters(b *bundle.Bundle, key string, job *resources.Job
|
||||||
|
|
||||||
return diags
|
return diags
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func recommendCatalogSchemaUsage(b *bundle.Bundle, ctx context.Context, key string, job *resources.Job) diag.Diagnostics {
|
||||||
|
var diags diag.Diagnostics
|
||||||
|
for _, t := range job.Tasks {
|
||||||
|
var relPath string
|
||||||
|
var expected string
|
||||||
|
var fix string
|
||||||
|
if t.NotebookTask != nil {
|
||||||
|
relPath = t.NotebookTask.NotebookPath
|
||||||
|
expected = `" dbutils.widgets.text(['"]schema|` +
|
||||||
|
`USE[^)]+schema`
|
||||||
|
fix = " dbutils.widgets.text('catalog')\n" +
|
||||||
|
" dbutils.widgets.text('schema')\n" +
|
||||||
|
" catalog = dbutils.widgets.get('catalog')\n" +
|
||||||
|
" schema = dbutils.widgets.get('schema')\n" +
|
||||||
|
" spark.sql(f'USE {catalog}.{schema}')\n"
|
||||||
|
} else if t.SparkPythonTask != nil {
|
||||||
|
relPath = t.SparkPythonTask.PythonFile
|
||||||
|
expected = `add_argument\(['"]--catalog'|` +
|
||||||
|
`USE[^)]+catalog`
|
||||||
|
fix = " def main():\n" +
|
||||||
|
" parser = argparse.ArgumentParser()\n" +
|
||||||
|
" parser.add_argument('--catalog', required=True)\n" +
|
||||||
|
" parser.add_argument('--schema', '-s', required=True)\n" +
|
||||||
|
" args, unknown = parser.parse_known_args()\n" +
|
||||||
|
" spark.sql(f\"USE {args.catalog}.{args.schema}\")\n"
|
||||||
|
} else if t.SqlTask != nil && t.SqlTask.File != nil {
|
||||||
|
relPath = t.SqlTask.File.Path
|
||||||
|
expected = `:schema|\{\{schema\}\}`
|
||||||
|
fix = " USE CATALOG {{catalog}};\n" +
|
||||||
|
" USE IDENTIFIER({schema});\n"
|
||||||
|
} else {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
sourceDir, err := b.Config.GetLocation("resources.jobs." + key).Directory()
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
localPath, _, err := GetLocalPath(ctx, b, sourceDir, relPath)
|
||||||
|
if err != nil {
|
||||||
|
// Any path errors are reported by another mutator
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if localPath == "" {
|
||||||
|
// If there is no local copy we don't want to download it and skip this check
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Warnf(ctx, "LocalPath: %s, relPath: %s, sourceDir: %s", localPath, relPath, sourceDir)
|
||||||
|
|
||||||
|
if !fileIncludesPattern(ctx, localPath, expected) {
|
||||||
|
diags = diags.Extend(diag.Diagnostics{{
|
||||||
|
Summary: fmt.Sprintf("Use the 'catalog' and 'schema' parameters provided via 'presets.catalog' and 'presets.schema' using\n\n" +
|
||||||
|
fix),
|
||||||
|
Severity: diag.Recommendation,
|
||||||
|
Locations: []dyn.Location{{
|
||||||
|
File: localPath,
|
||||||
|
Line: 1,
|
||||||
|
Column: 1,
|
||||||
|
}},
|
||||||
|
}})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return diags
|
||||||
|
}
|
||||||
|
|
||||||
|
func fileIncludesPattern(ctx context.Context, filePath string, expected string) bool {
|
||||||
|
content, err := os.ReadFile(filePath)
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf(ctx, "failed to check file %s: %v", filePath, err)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
matched, err := regexp.MatchString(expected, string(content))
|
||||||
|
if err != nil {
|
||||||
|
log.Warnf(ctx, "failed to check pattern in %s: %v", filePath, err)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return matched
|
||||||
|
}
|
||||||
|
|
|
@ -453,3 +453,53 @@ func TestApplyPresetsSourceLinkedDeployment(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// func TestApplyPresetsRecommendsCatalogSchemaUsage(t *testing.T) {
|
||||||
|
// dir := t.TempDir()
|
||||||
|
|
||||||
|
// ...
|
||||||
|
|
||||||
|
// b := &bundle.Bundle{
|
||||||
|
// Config: config.Root{
|
||||||
|
// Resources: config.Resources{
|
||||||
|
// Jobs: map[string]*resources.Job{
|
||||||
|
// "job1": {
|
||||||
|
// JobSettings: &jobs.JobSettings{
|
||||||
|
// Tasks: []jobs.Task{
|
||||||
|
// {
|
||||||
|
// NotebookTask: &jobs.NotebookTask{
|
||||||
|
// NotebookPath: notebookPath,
|
||||||
|
// },
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// SparkPythonTask: &jobs.SparkPythonTask{
|
||||||
|
// PythonFile: pythonPath,
|
||||||
|
// },
|
||||||
|
// },
|
||||||
|
// {
|
||||||
|
// NotebookTask: &jobs.NotebookTask{
|
||||||
|
// NotebookPath: "/Workspace/absolute/path/notebook",
|
||||||
|
// },
|
||||||
|
// },
|
||||||
|
// },
|
||||||
|
// },
|
||||||
|
// },
|
||||||
|
// },
|
||||||
|
// },
|
||||||
|
// },
|
||||||
|
// }
|
||||||
|
|
||||||
|
// ctx := context.Background()
|
||||||
|
// diags := bundle.Apply(ctx, b, ApplyPresets())
|
||||||
|
// require.Len(t, diags, 2)
|
||||||
|
|
||||||
|
// // Check notebook diagnostic
|
||||||
|
// assert.Equal(t, notebookPath, diags[0].Locations[0].File)
|
||||||
|
// assert.Equal(t, 1, diags[0].Locations[0].Line)
|
||||||
|
// assert.Equal(t, 1, diags[0].Locations[0].Column)
|
||||||
|
|
||||||
|
// // Check Python script diagnostic
|
||||||
|
// assert.Equal(t, pythonPath, diags[1].Locations[0].File)
|
||||||
|
// assert.Equal(t, 1, diags[1].Locations[0].Line)
|
||||||
|
// assert.Equal(t, 1, diags[1].Locations[0].Column)
|
||||||
|
// }
|
||||||
|
|
|
@ -13,21 +13,21 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "yes",
|
"default": "yes",
|
||||||
"enum": ["yes", "no"],
|
"enum": ["yes", "no"],
|
||||||
"description": "\n\nInclude a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'",
|
"description": "\nWould you like to include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'?",
|
||||||
"order": 2
|
"order": 2
|
||||||
},
|
},
|
||||||
"include_dlt": {
|
"include_dlt": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "yes",
|
"default": "yes",
|
||||||
"enum": ["yes", "no"],
|
"enum": ["yes", "no"],
|
||||||
"description": "Include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'",
|
"description": "Would you like to include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'?",
|
||||||
"order": 3
|
"order": 3
|
||||||
},
|
},
|
||||||
"include_python": {
|
"include_python": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "yes",
|
"default": "yes",
|
||||||
"enum": ["yes", "no"],
|
"enum": ["yes", "no"],
|
||||||
"description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'",
|
"description": "Would you like to include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'?",
|
||||||
"order": 4
|
"order": 4
|
||||||
},
|
},
|
||||||
"default_catalog": {
|
"default_catalog": {
|
||||||
|
|
|
@ -27,15 +27,24 @@
|
||||||
},
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
{{- if (eq .include_python "yes") }}
|
{{- if (eq .include_python "yes") }}
|
||||||
"import sys\n",
|
"import sys\n",
|
||||||
"sys.path.append('../src')\n",
|
"sys.path.append('../src')\n",
|
||||||
"from {{.project_name}} import main\n",
|
"from {{.project_name}} import main\n",
|
||||||
"\n",
|
"\n",
|
||||||
"main.get_taxis(spark).show(10)"
|
"catalog = dbutils.widgets.get('catalog')\n",
|
||||||
{{else}}
|
"schema = dbutils.widgets.get('schema')\n",
|
||||||
"spark.range(10)"
|
"spark.sql(f'USE {catalog}.{schema}')\n",
|
||||||
{{end -}}
|
"\n",
|
||||||
|
"spark.sql('SELECT * FROM example').show(10)"
|
||||||
|
{{- else}}
|
||||||
|
"# Load default catalog and schema as widget and set their values as the default catalog / schema\n",
|
||||||
|
"catalog = dbutils.widgets.get('catalog')\n",
|
||||||
|
"schema = dbutils.widgets.get('schema')\n",
|
||||||
|
"spark.sql(f'USE {catalog}.{schema}')\n",
|
||||||
|
"\n",
|
||||||
|
"spark.sql('SELECT * FROM example').show(10)"
|
||||||
|
{{- end}}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -46,8 +55,63 @@
|
||||||
"notebookMetadata": {
|
"notebookMetadata": {
|
||||||
"pythonIndentUnit": 2
|
"pythonIndentUnit": 2
|
||||||
},
|
},
|
||||||
"notebookName": "ipynb-notebook",
|
"notebookName": "exploration",
|
||||||
"widgets": {}
|
"widgets": {
|
||||||
|
"catalog": {
|
||||||
|
"currentValue": "{{.default_catalog}}",
|
||||||
|
"nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f1234",
|
||||||
|
"typedWidgetInfo": {
|
||||||
|
"autoCreated": false,
|
||||||
|
"defaultValue": "{{.default_catalog}}",
|
||||||
|
"label": null,
|
||||||
|
"name": "catalog",
|
||||||
|
"options": {
|
||||||
|
"widgetDisplayType": "Text",
|
||||||
|
"validationRegex": null
|
||||||
|
},
|
||||||
|
"parameterDataType": "String"
|
||||||
|
},
|
||||||
|
"widgetInfo": {
|
||||||
|
"widgetType": "text",
|
||||||
|
"defaultValue": "{{.default_catalog}}",
|
||||||
|
"label": null,
|
||||||
|
"name": "catalog",
|
||||||
|
"options": {
|
||||||
|
"widgetType": "text",
|
||||||
|
"autoCreated": null,
|
||||||
|
"validationRegex": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{{- $dev_schema := .shared_schema }}
|
||||||
|
{{- if (regexp "^yes").MatchString .personal_schemas}}
|
||||||
|
{{- $dev_schema = "{{short_name}}"}}
|
||||||
|
{{- end}}
|
||||||
|
"schema": {
|
||||||
|
"currentValue": "{{$dev_schema}}",
|
||||||
|
"nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f5678",
|
||||||
|
"typedWidgetInfo": {
|
||||||
|
"autoCreated": false,
|
||||||
|
"defaultValue": "{{$dev_schema}}",
|
||||||
|
"label": null,
|
||||||
|
"name": "schema",
|
||||||
|
"options": {
|
||||||
|
"widgetDisplayType": "Text",
|
||||||
|
"validationRegex": null
|
||||||
|
},
|
||||||
|
"parameterDataType": "String"
|
||||||
|
},
|
||||||
|
"widgetInfo": {
|
||||||
|
"widgetType": "text",
|
||||||
|
"defaultValue": "{{$dev_schema}}",
|
||||||
|
"label": null,
|
||||||
|
"name": "schema",
|
||||||
|
"options": {
|
||||||
|
"widgetType": "text",
|
||||||
|
"autoCreated": null,
|
||||||
|
"validationRegex": null
|
||||||
|
}
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
|
|
|
@ -27,8 +27,8 @@ def main():
|
||||||
# In the default template, these parameters are set
|
# In the default template, these parameters are set
|
||||||
# using the 'catalog' and 'schema' presets in databricks.yml.
|
# using the 'catalog' and 'schema' presets in databricks.yml.
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--catalog', '-c', required=True)
|
parser.add_argument('--catalog', required=True)
|
||||||
parser.add_argument('--schema', '-s', required=True)
|
parser.add_argument('--schema', required=True)
|
||||||
args, unknown = parser.parse_known_args()
|
args, unknown = parser.parse_known_args()
|
||||||
spark = get_spark()
|
spark = get_spark()
|
||||||
spark.sql(f"USE {args.catalog}.{args.schema}")
|
spark.sql(f"USE {args.catalog}.{args.schema}")
|
||||||
|
|
Loading…
Reference in New Issue