From be08585fb76454bfcab97612576889e54497a64a Mon Sep 17 00:00:00 2001 From: Lennart Kats Date: Fri, 20 Dec 2024 16:59:18 +0100 Subject: [PATCH] presets-catalog-schema-as-params # Your branch is ahead of 'origin/presets-catalog-schema-as-params' by 67 commits. # (use "git push" to publish your local commits) # # Changes to be committed: # modified: dbt-sql/databricks_template_schema.json # modified: default-python/databricks_template_schema.json # modified: default-python/template/{{.project_name}}/databricks.yml.tmpl # modified: default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl # modified: default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl # modified: default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl # modified: default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl # modified: default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl # modified: default-sql/databricks_template_schema.json # # Untracked files: # ../../../.cursorrules # ../../../bundle/config/resources/:tmp:tmp.py # ../../../delme.py # ../../../pr-cache-current-user-me # ../../../pr-cleanup-warnings.md # ../../../pr-contrib-templates.md # ../../../pr-cp-diag-ids-for-all.md # ../../../pr-cp-serverless-templates.md # ../../../pr-presets-catalog-schema-using-params.md # ../../../pr-update-sync-command-help.md # Revert template changes for now --- .../dbt-sql/databricks_template_schema.json | 2 +- .../databricks_template_schema.json | 42 ++-------- .../{{.project_name}}/databricks.yml.tmpl | 15 +--- .../resources/{{.project_name}}.job.yml.tmpl | 6 +- .../{{.project_name}}.pipeline.yml.tmpl | 7 ++ .../scratch/exploration.ipynb.tmpl | 79 ++----------------- .../{{.project_name}}/src/notebook.ipynb.tmpl | 70 ++-------------- .../src/{{.project_name}}/main.py.tmpl | 34 ++------ .../databricks_template_schema.json | 2 +- 9 files changed, 41 insertions(+), 216 deletions(-) diff --git a/libs/template/templates/dbt-sql/databricks_template_schema.json b/libs/template/templates/dbt-sql/databricks_template_schema.json index bb512153..cccf145d 100644 --- a/libs/template/templates/dbt-sql/databricks_template_schema.json +++ b/libs/template/templates/dbt-sql/databricks_template_schema.json @@ -45,7 +45,7 @@ "default": "default", "pattern": "^\\w+$", "pattern_match_failure_message": "Invalid schema name.", - "description": "\nPlease provide a default schema during development.\ndefault_schema", + "description": "\nPlease provide an initial schema during development.\ndefault_schema", "order": 5 } }, diff --git a/libs/template/templates/default-python/databricks_template_schema.json b/libs/template/templates/default-python/databricks_template_schema.json index 461aaa0c..d53bad91 100644 --- a/libs/template/templates/default-python/databricks_template_schema.json +++ b/libs/template/templates/default-python/databricks_template_schema.json @@ -4,7 +4,7 @@ "project_name": { "type": "string", "default": "my_project", - "description": "\nPlease provide a unique name for this project.\nproject_name", + "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project", "order": 1, "pattern": "^[A-Za-z0-9_]+$", "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores." @@ -13,55 +13,23 @@ "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "\nWould you like to include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'?", + "description": "Include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'", "order": 2 }, "include_dlt": { "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "Would you like to include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'?", + "description": "Include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'", "order": 3 }, "include_python": { "type": "string", "default": "yes", "enum": ["yes", "no"], - "description": "Would you like to include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'?", + "description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'", "order": 4 - }, - "default_catalog": { - "type": "string", - "default": "{{default_catalog}}", - "pattern": "^\\w*$", - "pattern_match_failure_message": "Invalid catalog name.", - "description": "\nPlease provide an initial catalog{{if eq (default_catalog) \"\"}} (leave blank when not using Unity Catalog){{end}}.\ndefault_catalog", - "order": 5 - }, - "personal_schemas": { - "type": "string", - "description": "\nWould you like to use a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas", - "enum": [ - "yes, use a schema based on the current user name during development", - "no, use a shared schema during development" - ], - "order": 6 - }, - "shared_schema": { - "skip_prompt_if": { - "properties": { - "personal_schemas": { - "const": "yes, use a schema based on the current user name during development" - } - } - }, - "type": "string", - "default": "default", - "pattern": "^\\w+$", - "pattern_match_failure_message": "Invalid schema name.", - "description": "\nPlease provide default schema during development.\ndefault_schema", - "order": 7 } }, - "success_message": "\nWorkspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml').\nworkspace_host: {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html." + "success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html." } diff --git a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl index 421fe501..c42b822a 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl @@ -6,13 +6,6 @@ bundle: include: - resources/*.yml -{{- $dev_schema := .shared_schema }} -{{- $prod_schema := .shared_schema }} -{{- if (regexp "^yes").MatchString .personal_schemas}} - {{- $dev_schema = "${workspace.current_user.short_name}"}} - {{- $prod_schema = "default"}} -{{- end}} - targets: dev: # The default target uses 'mode: development' to create a development copy. @@ -23,9 +16,6 @@ targets: default: true workspace: host: {{workspace_host}} - presets: - catalog: {{.default_catalog}} - schema: {{$dev_schema}} prod: mode: production @@ -36,6 +26,5 @@ targets: permissions: - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} level: CAN_MANAGE - presets: - catalog: {{.default_catalog}} - schema: {{$prod_schema}} + run_as: + {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl index 0ea69a75..5211e389 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl @@ -16,12 +16,16 @@ resources: interval: 1 unit: DAYS - {{if not is_service_principal -}} + {{- if not is_service_principal}} + email_notifications: on_failure: - {{user_name}} + {{else}} + {{end -}} + tasks: {{- if eq .include_notebook "yes" }} - task_key: notebook_task diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl index c3f94cb1..50f11fe2 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl @@ -3,6 +3,13 @@ resources: pipelines: {{.project_name}}_pipeline: name: {{.project_name}}_pipeline + {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}} + ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog: + # catalog: catalog_name + {{- else}} + catalog: {{default_catalog}} + {{- end}} + target: {{.project_name}}_${bundle.target} libraries: - notebook: path: ../src/dlt_pipeline.ipynb diff --git a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl index adb353c5..42164dff 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl @@ -27,25 +27,15 @@ }, "outputs": [], "source": [ - {{- if (eq .include_python "yes") }} + {{- if (eq .include_python "yes") }} "import sys\n", "sys.path.append('../src')\n", "from {{.project_name}} import main\n", "\n", - {{- /* We can use the short form here without 'dbutils.text()' since the widgets are defined in the metadata below. */}} - "catalog = dbutils.widgets.get('catalog')\n", - "schema = dbutils.widgets.get('schema')\n", - "spark.sql(f'USE {catalog}.{schema}')\n", - "\n", - "spark.sql('SELECT * FROM example').show(10)" - {{- else}} - "# Load default catalog and schema as widget and set their values as the default catalog / schema\n", - "catalog = dbutils.widgets.get('catalog')\n", - "schema = dbutils.widgets.get('schema')\n", - "spark.sql(f'USE {catalog}.{schema}')\n", - "\n", - "spark.sql('SELECT * FROM example').show(10)" - {{- end}} + "main.get_taxis(spark).show(10)" + {{else}} + "spark.range(10)" + {{end -}} ] } ], @@ -56,63 +46,8 @@ "notebookMetadata": { "pythonIndentUnit": 2 }, - "notebookName": "exploration", - "widgets": { - "catalog": { - "currentValue": "{{.default_catalog}}", - "nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f1234", - "typedWidgetInfo": { - "autoCreated": false, - "defaultValue": "{{.default_catalog}}", - "label": null, - "name": "catalog", - "options": { - "widgetDisplayType": "Text", - "validationRegex": null - }, - "parameterDataType": "String" - }, - "widgetInfo": { - "widgetType": "text", - "defaultValue": "{{.default_catalog}}", - "label": null, - "name": "catalog", - "options": { - "widgetType": "text", - "autoCreated": null, - "validationRegex": null - } - } - }, -{{- $dev_schema := .shared_schema }} -{{- if (regexp "^yes").MatchString .personal_schemas}} - {{- $dev_schema = "{{short_name}}"}} -{{- end}} - "schema": { - "currentValue": "{{$dev_schema}}", - "nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f5678", - "typedWidgetInfo": { - "autoCreated": false, - "defaultValue": "{{$dev_schema}}", - "label": null, - "name": "schema", - "options": { - "widgetDisplayType": "Text", - "validationRegex": null - }, - "parameterDataType": "String" - }, - "widgetInfo": { - "widgetType": "text", - "defaultValue": "{{$dev_schema}}", - "label": null, - "name": "schema", - "options": { - "widgetType": "text", - "autoCreated": null, - "validationRegex": null - } - } + "notebookName": "ipynb-notebook", + "widgets": {} }, "kernelspec": { "display_name": "Python 3", diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl index 0924e60f..6782a053 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl @@ -23,11 +23,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Load default catalog and schema as widget and set their values as the default catalog / schema\n", - {{- /* We can use the short form here without 'dbutils.text()' since the widgets are defined in the metadata below. */}} - "catalog = dbutils.widgets.get('catalog')\n", - "schema = dbutils.widgets.get('schema')\n", - "spark.sql(f'USE {catalog}.{schema}')" + "%load_ext autoreload\n", + "%autoreload 2" ] }, { @@ -50,9 +47,9 @@ {{- if (eq .include_python "yes") }} "from {{.project_name}} import main\n", "\n", - "main.create_example_table()" + "main.get_taxis(spark).show(10)" {{else}} - "spark.sql("CREATE OR REPLACE TABLE example AS SELECT 'example table' AS text_column")" + "spark.range(10)" {{end -}} ] } @@ -65,64 +62,7 @@ "pythonIndentUnit": 2 }, "notebookName": "notebook", - "widgets": { - "catalog": { - "currentValue": "{{.default_catalog}}", - "nuid": "3965fc9c-8080-45b1-bee3-f75cef7685b4", - "typedWidgetInfo": { - "autoCreated": false, - "defaultValue": "{{.default_catalog}}", - "label": null, - "name": "catalog", - "options": { - "widgetDisplayType": "Text", - "validationRegex": null - }, - "parameterDataType": "String" - }, - "widgetInfo": { - "widgetType": "text", - "defaultValue": "{{.default_catalog}}", - "label": null, - "name": "catalog", - "options": { - "widgetType": "text", - "autoCreated": null, - "validationRegex": null - } - } - }, -{{- $dev_schema := .shared_schema }} -{{- if (regexp "^yes").MatchString .personal_schemas}} - {{- $dev_schema = "{{short_name}}"}} -{{- end}} - "schema": { - "currentValue": "{{$dev_schema}}", - "nuid": "6ec0d70f-39bf-4859-a510-02c3e3d59bff", - "typedWidgetInfo": { - "autoCreated": false, - "defaultValue": "{{$dev_schema}}", - "label": null, - "name": "schema", - "options": { - "widgetDisplayType": "Text", - "validationRegex": null - }, - "parameterDataType": "String" - }, - "widgetInfo": { - "widgetType": "text", - "defaultValue": "{{$dev_schema}}", - "label": null, - "name": "schema", - "options": { - "widgetType": "text", - "autoCreated": null, - "validationRegex": null - } - } - } - } + "widgets": {} }, "kernelspec": { "display_name": "Python 3", diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl index e79920a9..c514c6dc 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl @@ -1,39 +1,21 @@ from pyspark.sql import SparkSession, DataFrame -import argparse +def get_taxis(spark: SparkSession) -> DataFrame: + return spark.read.table("samples.nyctaxi.trips") + + +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. def get_spark() -> SparkSession: - """ - Create a new Databricks Connect session. If this fails, - check that you have configured Databricks Connect correctly. - See https://docs.databricks.com/dev-tools/databricks-connect.html. - """ try: from databricks.connect import DatabricksSession return DatabricksSession.builder.getOrCreate() except ImportError: return SparkSession.builder.getOrCreate() -def get_taxis(spark: SparkSession) -> DataFrame: - return spark.read.table("samples.nyctaxi.trips") - -def create_example_table(): - """ - Create a table called 'example' in the default catalog and schema. - """ - get_spark().sql("CREATE OR REPLACE TABLE example AS SELECT 'example table' AS text_column") - def main(): - # Set the catalog and schema for the current session. - # In the default template, these parameters are set - # using the 'catalog' and 'schema' presets in databricks.yml. - parser = argparse.ArgumentParser() - parser.add_argument('--catalog', required=True) - parser.add_argument('--schema', required=True) - args, unknown = parser.parse_known_args() - spark = get_spark() - spark.sql(f"USE {args.catalog}.{args.schema}") - - create_example_table() + get_taxis(get_spark()).show(5) if __name__ == '__main__': main() diff --git a/libs/template/templates/default-sql/databricks_template_schema.json b/libs/template/templates/default-sql/databricks_template_schema.json index 81fa7e2d..113cbef6 100644 --- a/libs/template/templates/default-sql/databricks_template_schema.json +++ b/libs/template/templates/default-sql/databricks_template_schema.json @@ -45,7 +45,7 @@ "default": "default", "pattern": "^\\w+$", "pattern_match_failure_message": "Invalid schema name.", - "description": "\nPlease provide a default schema during development.\ndefault_schema", + "description": "\nPlease provide an initial schema during development.\ndefault_schema", "order": 5 } },