presets-catalog-schema-as-params # Your branch is ahead of 'origin/presets-catalog-schema-as-params' by 67 commits. # (use "git push" to publish your

local commits) # # Changes to be committed: # modified: dbt-sql/databricks_template_schema.json # modified: default-python/databricks_template_schema.json # modified: default-python/template/{{.project_name}}/databricks.yml.tmpl # modified: default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl # modified: default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl # modified: default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl # modified: default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl # modified: default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl # modified: default-sql/databricks_template_schema.json # # Untracked files: # ../../../.cursorrules # ../../../bundle/config/resources/:tmp:tmp.py # ../../../delme.py # ../../../pr-cache-current-user-me # ../../../pr-cleanup-warnings.md # ../../../pr-contrib-templates.md # ../../../pr-cp-diag-ids-for-all.md # ../../../pr-cp-serverless-templates.md # ../../../pr-presets-catalog-schema-using-params.md # ../../../pr-update-sync-command-help.md # Revert template changes for now
2024-12-20 16:59:18 +01:00 · 2024-12-20 16:59:18 +01:00 · be08585fb7
parent 8c2eaaba43
commit be08585fb7
9 changed files with 41 additions and 216 deletions
--- a/libs/template/templates/dbt-sql/databricks_template_schema.json
+++ b/libs/template/templates/dbt-sql/databricks_template_schema.json
@ -45,7 +45,7 @@
            "default": "default",
            "pattern": "^\\w+$",
            "pattern_match_failure_message": "Invalid schema name.",
-            "description": "\nPlease provide a default schema during development.\ndefault_schema",
+            "description": "\nPlease provide an initial schema during development.\ndefault_schema",
            "order": 5
        }
    },
--- a/libs/template/templates/default-python/databricks_template_schema.json
+++ b/libs/template/templates/default-python/databricks_template_schema.json
@ -4,7 +4,7 @@
        "project_name": {
            "type": "string",
            "default": "my_project",
-            "description": "\nPlease provide a unique name for this project.\nproject_name",
+            "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project",
            "order": 1,
            "pattern": "^[A-Za-z0-9_]+$",
            "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores."
@ -13,55 +13,23 @@
            "type": "string",
            "default": "yes",
            "enum": ["yes", "no"],
-            "description": "\nWould you like to include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'?",
+            "description": "Include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'",
            "order": 2
        },
        "include_dlt": {
            "type": "string",
            "default": "yes",
            "enum": ["yes", "no"],
-            "description": "Would you like to include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'?",
+            "description": "Include a stub (sample) Delta Live Tables pipeline in '{{.project_name}}{{path_separator}}src'",
            "order": 3
        },
        "include_python": {
            "type": "string",
            "default": "yes",
            "enum": ["yes", "no"],
-            "description": "Would you like to include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'?",
+            "description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'",
            "order": 4
-        },
-        "default_catalog": {
-            "type": "string",
-            "default": "{{default_catalog}}",
-            "pattern": "^\\w*$",
-            "pattern_match_failure_message": "Invalid catalog name.",
-            "description": "\nPlease provide an initial catalog{{if eq (default_catalog) \"\"}} (leave blank when not using Unity Catalog){{end}}.\ndefault_catalog",
-            "order": 5
-        },
-        "personal_schemas": {
-            "type": "string",
-            "description": "\nWould you like to use a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas",
-            "enum": [
-                "yes, use a schema based on the current user name during development",
-                "no, use a shared schema during development"
-            ],
-            "order": 6
-        },
-        "shared_schema": {
-            "skip_prompt_if": {
-                "properties": {
-                    "personal_schemas": {
-                        "const": "yes, use a schema based on the current user name during development"
-                    }
-                }
-            },
-            "type": "string",
-            "default": "default",
-            "pattern": "^\\w+$",
-            "pattern_match_failure_message": "Invalid schema name.",
-            "description": "\nPlease provide default schema during development.\ndefault_schema",
-            "order": 7
        }
    },
-    "success_message": "\nWorkspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml').\nworkspace_host: {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html."
+    "success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html."
 }
--- a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl
@ -6,13 +6,6 @@ bundle:
 include:
  - resources/*.yml

-{{- $dev_schema := .shared_schema }}
-{{- $prod_schema := .shared_schema }}
-{{- if (regexp "^yes").MatchString .personal_schemas}}
-  {{- $dev_schema = "${workspace.current_user.short_name}"}}
-  {{- $prod_schema = "default"}}
-{{- end}}
-
 targets:
  dev:
    # The default target uses 'mode: development' to create a development copy.
@ -23,9 +16,6 @@ targets:
    default: true
    workspace:
      host: {{workspace_host}}
-    presets:
-      catalog: {{.default_catalog}}
-      schema: {{$dev_schema}}

  prod:
    mode: production
@ -36,6 +26,5 @@ targets:
    permissions:
      - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
        level: CAN_MANAGE
-    presets:
-      catalog: {{.default_catalog}}
-      schema: {{$prod_schema}}
+    run_as:
+      {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
--- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.job.yml.tmpl
@ -16,12 +16,16 @@ resources:
          interval: 1
          unit: DAYS

-      {{if not is_service_principal -}}
+      {{- if not is_service_principal}}
+
      email_notifications:
        on_failure:
          - {{user_name}}

+      {{else}}
+
      {{end -}}
+
      tasks:
        {{- if eq .include_notebook "yes" }}
        - task_key: notebook_task
--- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}.pipeline.yml.tmpl
@ -3,6 +3,13 @@ resources:
  pipelines:
    {{.project_name}}_pipeline:
      name: {{.project_name}}_pipeline
+      {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}}
+      ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog:
+      # catalog: catalog_name
+      {{- else}}
+      catalog: {{default_catalog}}
+      {{- end}}
+      target: {{.project_name}}_${bundle.target}
      libraries:
        - notebook:
            path: ../src/dlt_pipeline.ipynb
--- a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl
@ -27,25 +27,15 @@
   },
   "outputs": [],
   "source": [
-    {{- if (eq .include_python "yes") }}
+   {{- if (eq .include_python "yes") }}
    "import sys\n",
    "sys.path.append('../src')\n",
    "from {{.project_name}} import main\n",
    "\n",
-    {{- /* We can use the short form here without 'dbutils.text()' since the widgets are defined in the metadata below. */}}
-    "catalog = dbutils.widgets.get('catalog')\n",
-    "schema = dbutils.widgets.get('schema')\n",
-    "spark.sql(f'USE {catalog}.{schema}')\n",
-    "\n",
-    "spark.sql('SELECT * FROM example').show(10)"
-    {{- else}}
-    "# Load default catalog and schema as widget and set their values as the default catalog / schema\n",
-    "catalog = dbutils.widgets.get('catalog')\n",
-    "schema = dbutils.widgets.get('schema')\n",
-    "spark.sql(f'USE {catalog}.{schema}')\n",
-    "\n",
-    "spark.sql('SELECT * FROM example').show(10)"
-   {{- end}}
+    "main.get_taxis(spark).show(10)"
+   {{else}}
+    "spark.range(10)"
+   {{end -}}
   ]
  }
 ],
@ -56,63 +46,8 @@
   "notebookMetadata": {
    "pythonIndentUnit": 2
   },
-   "notebookName": "exploration",
-   "widgets": {
-    "catalog": {
-     "currentValue": "{{.default_catalog}}",
-     "nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f1234",
-     "typedWidgetInfo": {
-      "autoCreated": false,
-      "defaultValue": "{{.default_catalog}}",
-      "label": null,
-      "name": "catalog",
-      "options": {
-       "widgetDisplayType": "Text",
-       "validationRegex": null
-      },
-      "parameterDataType": "String"
-     },
-     "widgetInfo": {
-      "widgetType": "text",
-      "defaultValue": "{{.default_catalog}}",
-      "label": null,
-      "name": "catalog",
-      "options": {
-       "widgetType": "text",
-       "autoCreated": null,
-       "validationRegex": null
-      }
-     }
-    },
-{{- $dev_schema := .shared_schema }}
-{{- if (regexp "^yes").MatchString .personal_schemas}}
-  {{- $dev_schema = "{{short_name}}"}}
-{{- end}}
-    "schema": {
-     "currentValue": "{{$dev_schema}}",
-     "nuid": "c47e96d8-5751-4c8a-9d6b-5c6c7c3f5678",
-     "typedWidgetInfo": {
-      "autoCreated": false,
-      "defaultValue": "{{$dev_schema}}",
-      "label": null,
-      "name": "schema",
-      "options": {
-       "widgetDisplayType": "Text",
-       "validationRegex": null
-      },
-      "parameterDataType": "String"
-     },
-     "widgetInfo": {
-      "widgetType": "text",
-      "defaultValue": "{{$dev_schema}}",
-      "label": null,
-      "name": "schema",
-      "options": {
-       "widgetType": "text",
-       "autoCreated": null,
-       "validationRegex": null
-      }
-     }
+   "notebookName": "ipynb-notebook",
+   "widgets": {}
  },
  "kernelspec": {
   "display_name": "Python 3",
--- a/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl
@ -23,11 +23,8 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# Load default catalog and schema as widget and set their values as the default catalog / schema\n",
-    {{- /* We can use the short form here without 'dbutils.text()' since the widgets are defined in the metadata below. */}}
-    "catalog = dbutils.widgets.get('catalog')\n",
-    "schema = dbutils.widgets.get('schema')\n",
-    "spark.sql(f'USE {catalog}.{schema}')"
+    "%load_ext autoreload\n",
+    "%autoreload 2"
   ]
  },
  {
@ -50,9 +47,9 @@
   {{- if (eq .include_python "yes") }}
    "from {{.project_name}} import main\n",
    "\n",
-    "main.create_example_table()"
+    "main.get_taxis(spark).show(10)"
   {{else}}
-    "spark.sql("CREATE OR REPLACE TABLE example AS SELECT 'example table' AS text_column")"
+    "spark.range(10)"
   {{end -}}
   ]
  }
@ -65,64 +62,7 @@
    "pythonIndentUnit": 2
   },
   "notebookName": "notebook",
-   "widgets": {
-    "catalog": {
-     "currentValue": "{{.default_catalog}}",
-     "nuid": "3965fc9c-8080-45b1-bee3-f75cef7685b4",
-     "typedWidgetInfo": {
-      "autoCreated": false,
-      "defaultValue": "{{.default_catalog}}",
-      "label": null,
-      "name": "catalog",
-      "options": {
-       "widgetDisplayType": "Text",
-       "validationRegex": null
-      },
-      "parameterDataType": "String"
-     },
-     "widgetInfo": {
-      "widgetType": "text",
-      "defaultValue": "{{.default_catalog}}",
-      "label": null,
-      "name": "catalog",
-      "options": {
-       "widgetType": "text",
-       "autoCreated": null,
-       "validationRegex": null
-      }
-     }
-    },
-{{- $dev_schema := .shared_schema }}
-{{- if (regexp "^yes").MatchString .personal_schemas}}
-  {{- $dev_schema = "{{short_name}}"}}
-{{- end}}
-    "schema": {
-     "currentValue": "{{$dev_schema}}",
-     "nuid": "6ec0d70f-39bf-4859-a510-02c3e3d59bff",
-     "typedWidgetInfo": {
-      "autoCreated": false,
-      "defaultValue": "{{$dev_schema}}",
-      "label": null,
-      "name": "schema",
-      "options": {
-       "widgetDisplayType": "Text",
-       "validationRegex": null
-      },
-      "parameterDataType": "String"
-     },
-     "widgetInfo": {
-      "widgetType": "text",
-      "defaultValue": "{{$dev_schema}}",
-      "label": null,
-      "name": "schema",
-      "options": {
-       "widgetType": "text",
-       "autoCreated": null,
-       "validationRegex": null
-      }
-     }
-    }
-   }
+   "widgets": {}
  },
  "kernelspec": {
   "display_name": "Python 3",
--- a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl
@ -1,39 +1,21 @@
 from pyspark.sql import SparkSession, DataFrame
-import argparse

+def get_taxis(spark: SparkSession) -> DataFrame:
+  return spark.read.table("samples.nyctaxi.trips")
+
+
+# Create a new Databricks Connect session. If this fails,
+# check that you have configured Databricks Connect correctly.
+# See https://docs.databricks.com/dev-tools/databricks-connect.html.
 def get_spark() -> SparkSession:
-  """
-  Create a new Databricks Connect session. If this fails,
-  check that you have configured Databricks Connect correctly.
-  See https://docs.databricks.com/dev-tools/databricks-connect.html.
-  """
  try:
    from databricks.connect import DatabricksSession
    return DatabricksSession.builder.getOrCreate()
  except ImportError:
    return SparkSession.builder.getOrCreate()

-def get_taxis(spark: SparkSession) -> DataFrame:
-  return spark.read.table("samples.nyctaxi.trips")
-
-def create_example_table():
-  """
-  Create a table called 'example' in the default catalog and schema.
-  """
-  get_spark().sql("CREATE OR REPLACE TABLE example AS SELECT 'example table' AS text_column")
-
 def main():
-  # Set the catalog and schema for the current session.
-  # In the default template, these parameters are set
-  # using the 'catalog' and 'schema' presets in databricks.yml.
-  parser = argparse.ArgumentParser()
-  parser.add_argument('--catalog', required=True)
-  parser.add_argument('--schema', required=True)
-  args, unknown = parser.parse_known_args()
-  spark = get_spark()
-  spark.sql(f"USE {args.catalog}.{args.schema}")
-
-  create_example_table()
+  get_taxis(get_spark()).show(5)

 if __name__ == '__main__':
  main()
--- a/libs/template/templates/default-sql/databricks_template_schema.json
+++ b/libs/template/templates/default-sql/databricks_template_schema.json
@ -45,7 +45,7 @@
            "default": "default",
            "pattern": "^\\w+$",
            "pattern_match_failure_message": "Invalid schema name.",
-            "description": "\nPlease provide a default schema during development.\ndefault_schema",
+            "description": "\nPlease provide an initial schema during development.\ndefault_schema",
            "order": 5
        }
    },