From 8b9930a49a0ce64973c8bcfecdfe306d2e9d5170 Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Mon, 11 Dec 2023 20:13:14 +0100 Subject: [PATCH] Improve default template (#1046) ## Changes - Tweak strings, documentation in template - Extend requirements-dev.txt with setuptools/wheel for building whl files - Clarify what the "_job.yml" file is for for users who are only interested in DLT pipelines (answering a question that came up recently) ## Tests Existing tests exercise this template --- .../databricks_template_schema.json | 6 +-- .../template/{{.project_name}}/.gitignore | 1 - .../template/{{.project_name}}/README.md.tmpl | 2 +- .../{{.project_name}}/databricks.yml.tmpl | 38 +++++++------------ .../requirements-dev.txt.tmpl | 4 ++ .../resources/{{.project_name}}_job.yml.tmpl | 9 ++++- 6 files changed, 30 insertions(+), 30 deletions(-) diff --git a/libs/template/templates/default-python/databricks_template_schema.json b/libs/template/templates/default-python/databricks_template_schema.json index 8d5afb57..d53bad91 100644 --- a/libs/template/templates/default-python/databricks_template_schema.json +++ b/libs/template/templates/default-python/databricks_template_schema.json @@ -1,10 +1,10 @@ { - "welcome_message": "\nWelcome to the sample Databricks Asset Bundle template! Please enter the following information to initialize your sample DAB.\n", + "welcome_message": "\nWelcome to the default Python template for Databricks Asset Bundles!", "properties": { "project_name": { "type": "string", "default": "my_project", - "description": "Unique name for this project", + "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project", "order": 1, "pattern": "^[A-Za-z0-9_]+$", "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores." @@ -31,5 +31,5 @@ "order": 4 } }, - "success_message": "\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md of your project for further instructions on getting started.\nOr read the documentation on Databricks Asset Bundles at https://docs.databricks.com/dev-tools/bundles/index.html." + "success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html." } diff --git a/libs/template/templates/default-python/template/{{.project_name}}/.gitignore b/libs/template/templates/default-python/template/{{.project_name}}/.gitignore index aa87f019..0dab7f49 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/.gitignore +++ b/libs/template/templates/default-python/template/{{.project_name}}/.gitignore @@ -1,4 +1,3 @@ - .databricks/ build/ dist/ diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl index b451d03b..476c1cd6 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl @@ -28,7 +28,7 @@ The '{{.project_name}}' project was generated by using the default-python templa $ databricks bundle deploy --target prod ``` -5. To run a job or pipeline, use the "run" comand: +5. To run a job or pipeline, use the "run" command: ``` $ databricks bundle run ``` diff --git a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl index 7fbf4da4..7860b32b 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl @@ -7,46 +7,36 @@ include: - resources/*.yml targets: - # The 'dev' target, used for development purposes. - # Whenever a developer deploys using 'dev', they get their own copy. + # The 'dev' target, for development purposes. This target is the default. dev: - # We use 'mode: development' to make sure everything deployed to this target gets a prefix - # like '[dev my_user_name]'. Setting this mode also disables any schedules and - # automatic triggers for jobs and enables the 'development' mode for Delta Live Tables pipelines. + # We use 'mode: development' to indicate this is a personal development copy: + # - Deployed resources get prefixed with '[dev my_user_name]' + # - Any job schedules and triggers are paused by default + # - The 'development' mode is used for Delta Live Tables pipelines mode: development default: true workspace: host: {{workspace_host}} - # Optionally, there could be a 'staging' target here. - # (See Databricks docs on CI/CD at https://docs.databricks.com/dev-tools/bundles/index.html.) + ## Optionally, there could be a 'staging' target here. + ## (See Databricks docs on CI/CD at https://docs.databricks.com/dev-tools/bundles/index.html.) # # staging: - # workspace: - # host: {{workspace_host}} + # workspace: + # host: {{workspace_host}} # The 'prod' target, used for production deployment. prod: - # For production deployments, we only have a single copy, so we override the - # workspace.root_path default of - # /Users/${workspace.current_user.userName}/.bundle/${bundle.target}/${bundle.name} - # to a path that is not specific to the current user. - {{- /* - Explaining 'mode: production' isn't as pressing as explaining 'mode: development'. - As we already talked about the other mode above, users can just - look at documentation or ask the assistant about 'mode: production'. - # - # By making use of 'mode: production' we enable strict checks - # to make sure we have correctly configured this target. - */}} + # We use 'mode: production' to indicate this is a production deployment. + # Doing so enables strict verification of the settings below. mode: production workspace: host: {{workspace_host}} + # We only have a single deployment copy for production, so we use a shared path. root_path: /Shared/.bundle/prod/${bundle.name} {{- if not is_service_principal}} run_as: - # This runs as {{user_name}} in production. Alternatively, - # a service principal could be used here using service_principal_name - # (see Databricks documentation). + # This runs as {{user_name}} in production. We could also use a service principal here + # using service_principal_name (see https://docs.databricks.com/dev-tools/bundles/permissions.html). user_name: {{user_name}} {{end -}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/requirements-dev.txt.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/requirements-dev.txt.tmpl index 2d4c0f64..6da40321 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/requirements-dev.txt.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/requirements-dev.txt.tmpl @@ -6,6 +6,10 @@ ## pytest is the default package used for testing pytest +## Dependencies for building wheel files +setuptools +wheel + ## databricks-connect can be used to run parts of this project locally. ## See https://docs.databricks.com/dev-tools/databricks-connect.html. ## diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl index 23bdee49..dc79e3a1 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl @@ -1,10 +1,17 @@ -# The main job for {{.project_name}} +# The main job for {{.project_name}}. + +{{- /* Clarify what this job is for for DLT-only users. */}} +{{if and (eq .include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}} +# This job runs {{.project_name}}_pipeline on a schedule. +{{end -}} + resources: jobs: {{.project_name}}_job: name: {{.project_name}}_job schedule: + # Run every day at 8:37 AM quartz_cron_expression: '44 37 8 * * ?' timezone_id: Europe/Amsterdam