From aa36aee15995d1fb98ac6eb6889679479ed91bdd Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Tue, 4 Jun 2024 10:57:13 +0200 Subject: [PATCH] Make dbt-sql and default-sql templates public (#1463) ## Changes This makes the dbt-sql and default-sql templates public. These templates were previously not listed and marked "experimental" since structured streaming tables were still in gated preview and would result in weird error messages when a workspace wasn't enabled for the preview. This PR also incorporates some of the feedback and learnings for these templates so far. --- cmd/bundle/init.go | 4 +--- cmd/bundle/init_test.go | 4 ++++ libs/template/materialize.go | 16 ++++++++------ .../dbt-sql/databricks_template_schema.json | 2 +- .../dbt_profiles/profiles.yml.tmpl | 21 +++++++++++++------ .../resources/{{.project_name}}_job.yml.tmpl | 16 ++++++++------ .../databricks_template_schema.json | 2 +- .../src/orders_daily.sql.tmpl | 18 +++++++++++----- .../{{.project_name}}/src/orders_raw.sql.tmpl | 5 ++++- 9 files changed, 59 insertions(+), 29 deletions(-) diff --git a/cmd/bundle/init.go b/cmd/bundle/init.go index 6845ab67..c8c59c14 100644 --- a/cmd/bundle/init.go +++ b/cmd/bundle/init.go @@ -38,12 +38,10 @@ var nativeTemplates = []nativeTemplate{ { name: "default-sql", description: "The default SQL template for .sql files that run with Databricks SQL", - hidden: true, }, { name: "dbt-sql", - description: "The dbt SQL template (https://www.databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)", - hidden: true, + description: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)", }, { name: "mlops-stacks", diff --git a/cmd/bundle/init_test.go b/cmd/bundle/init_test.go index aa899159..475b2e14 100644 --- a/cmd/bundle/init_test.go +++ b/cmd/bundle/init_test.go @@ -30,6 +30,8 @@ func TestBundleInitRepoName(t *testing.T) { func TestNativeTemplateOptions(t *testing.T) { expected := []cmdio.Tuple{ {Name: "default-python", Id: "The default Python template for Notebooks / Delta Live Tables / Workflows"}, + {Name: "default-sql", Id: "The default SQL template for .sql files that run with Databricks SQL"}, + {Name: "dbt-sql", Id: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)"}, {Name: "mlops-stacks", Id: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)"}, {Name: "custom...", Id: "Bring your own template"}, } @@ -38,6 +40,8 @@ func TestNativeTemplateOptions(t *testing.T) { func TestNativeTemplateHelpDescriptions(t *testing.T) { expected := `- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows +- default-sql: The default SQL template for .sql files that run with Databricks SQL +- dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks) - mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)` assert.Equal(t, expected, nativeTemplateHelpDescriptions()) } diff --git a/libs/template/materialize.go b/libs/template/materialize.go index 04f4c8f0..d824bf38 100644 --- a/libs/template/materialize.go +++ b/libs/template/materialize.go @@ -54,12 +54,6 @@ func Materialize(ctx context.Context, configFilePath, templateRoot, outputDir st return err } - // Print welcome message - welcome := config.schema.WelcomeMessage - if welcome != "" { - cmdio.LogString(ctx, welcome) - } - // Read and assign config values from file if configFilePath != "" { err = config.assignValuesFromFile(configFilePath) @@ -73,6 +67,16 @@ func Materialize(ctx context.Context, configFilePath, templateRoot, outputDir st return err } + // Print welcome message + welcome := config.schema.WelcomeMessage + if welcome != "" { + welcome, err = r.executeTemplate(welcome) + if err != nil { + return err + } + cmdio.LogString(ctx, welcome) + } + // Prompt user for any missing config values. Assign default values if // terminal is not TTY err = config.promptOrAssignDefaultValues(r) diff --git a/libs/template/templates/dbt-sql/databricks_template_schema.json b/libs/template/templates/dbt-sql/databricks_template_schema.json index 7b39f618..7fc35352 100644 --- a/libs/template/templates/dbt-sql/databricks_template_schema.json +++ b/libs/template/templates/dbt-sql/databricks_template_schema.json @@ -1,5 +1,5 @@ { - "welcome_message": "\nWelcome to the (EXPERIMENTAL) dbt template for Databricks Asset Bundles!", + "welcome_message": "\nWelcome to the dbt template for Databricks Asset Bundles!\n\nWorkspace selected based on your current profile (see https://docs.databricks.com/dev-tools/cli/profiles.html for how to change this).\nworkspace_host: {{workspace_host}}", "properties": { "project_name": { "type": "string", diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_profiles/profiles.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_profiles/profiles.yml.tmpl index d29bd55c..cce80f8d 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_profiles/profiles.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/dbt_profiles/profiles.yml.tmpl @@ -3,26 +3,35 @@ {{- $catalog = "\"\" # workspace default"}} {{- end}} # This file defines dbt profiles for deployed dbt jobs. -# Note that for local development you should create your own, local profile. -# (see README.md). my_dbt_project: target: dev # default target outputs: - dev: + # Doing local development with the dbt CLI? + # Then you should create your own profile in your .dbt/profiles.yml using 'dbt init' + # (See README.md) + + # The default target when deployed with the Databricks CLI + # N.B. when you use dbt from the command line, it uses the profile from .dbt/profiles.yml + dev: type: databricks method: http catalog: {{$catalog}} +{{- if (regexp "^yes").MatchString .personal_schemas}} schema: "{{"{{"}} var('dev_schema') {{"}}"}}" +{{- else}} + schema: "{{.shared_schema}}" +{{- end}} http_path: {{.http_path}} # The workspace host / token are provided by Databricks - # see databricks.yml for the host used for 'dev' + # see databricks.yml for the workspace host used for 'dev' host: "{{"{{"}} env_var('DBT_HOST') {{"}}"}}" token: "{{"{{"}} env_var('DBT_ACCESS_TOKEN') {{"}}"}}" - prod: + # The production target when deployed with the Databricks CLI + prod: type: databricks method: http catalog: {{$catalog}} @@ -31,6 +40,6 @@ my_dbt_project: http_path: {{.http_path}} # The workspace host / token are provided by Databricks - # see databricks.yml for the host used for 'dev' + # see databricks.yml for the workspace host used for 'prod' host: "{{"{{"}} env_var('DBT_HOST') {{"}}"}}" token: "{{"{{"}} env_var('DBT_ACCESS_TOKEN') {{"}}"}}" diff --git a/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl b/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl index 688c23b9..acf1aa48 100644 --- a/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl +++ b/libs/template/templates/dbt-sql/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl @@ -12,10 +12,6 @@ resources: on_failure: - {{user_name}} -{{- $dev_schema := .shared_schema }} -{{- if (regexp "^yes").MatchString .personal_schemas}} -{{- $dev_schema = "${workspace.current_user.short_name}"}} -{{- end}} tasks: - task_key: dbt @@ -25,9 +21,17 @@ resources: # The default schema, catalog, etc. are defined in ../dbt_profiles/profiles.yml profiles_directory: dbt_profiles/ commands: +{{- if (regexp "^yes").MatchString .personal_schemas}} + # The dbt commands to run (see also dbt_profiles/profiles.yml; dev_schema is used in the dev profile) - 'dbt deps --target=${bundle.target}' - - 'dbt seed --target=${bundle.target} --vars "{ dev_schema: {{$dev_schema}} }"' - - 'dbt run --target=${bundle.target} --vars "{ dev_schema: {{$dev_schema}} }"' + - 'dbt seed --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"' + - 'dbt run --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"' +{{- else}} + # The dbt commands to run (see also the dev/prod profiles in dbt_profiles/profiles.yml) + - 'dbt deps --target=${bundle.target}' + - 'dbt seed --target=${bundle.target}' + - 'dbt run --target=${bundle.target}' +{{- end}} libraries: - pypi: diff --git a/libs/template/templates/default-sql/databricks_template_schema.json b/libs/template/templates/default-sql/databricks_template_schema.json index b7a42e19..aacd6a0a 100644 --- a/libs/template/templates/default-sql/databricks_template_schema.json +++ b/libs/template/templates/default-sql/databricks_template_schema.json @@ -1,5 +1,5 @@ { - "welcome_message": "\nWelcome to the (EXPERIMENTAL) default SQL template for Databricks Asset Bundles!", + "welcome_message": "\nWelcome to the default SQL template for Databricks Asset Bundles!\n\nWorkspace selected based on your current profile (see https://docs.databricks.com/dev-tools/cli/profiles.html for how to change this).\nworkspace_host: {{workspace_host}}", "properties": { "project_name": { "type": "string", diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl index 76ecadd3..870fe9c0 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_daily.sql.tmpl @@ -1,14 +1,22 @@ -- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql_job.yml) -{{- /* We can't use a materialized view here since they don't support 'create or refresh yet.*/}} +{{- /* We can't use a materialized view here since they don't support 'create or refresh' yet.*/}} + +USE CATALOG {{"{{"}}catalog{{"}}"}}; +USE {{"{{"}}schema{{"}}"}}; CREATE OR REPLACE VIEW - IDENTIFIER(CONCAT({{"{{"}}catalog{{"}}"}}, '.', {{"{{"}}schema{{"}}"}}, '.', 'orders_daily')) + orders_daily AS SELECT order_date, count(*) AS number_of_orders FROM - IDENTIFIER(CONCAT({{"{{"}}catalog{{"}}"}}, '.', {{"{{"}}schema{{"}}"}}, '.', 'orders_raw')) + orders_raw --- During development, only process a smaller range of data -WHERE {{"{{"}}bundle_target{{"}}"}} == "prod" OR (order_date >= '2019-08-01' AND order_date < '2019-09-01') +WHERE if( + {{"{{"}}bundle_target{{"}}"}} != "prod", + true, + + -- During development, only process a smaller range of data + order_date >= '2019-08-01' AND order_date < '2019-09-01' +) GROUP BY order_date diff --git a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_raw.sql.tmpl b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_raw.sql.tmpl index 96769062..d5891895 100644 --- a/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_raw.sql.tmpl +++ b/libs/template/templates/default-sql/template/{{.project_name}}/src/orders_raw.sql.tmpl @@ -3,8 +3,11 @@ -- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ -- See also https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-create-streaming-table.html +USE CATALOG {{"{{"}}catalog{{"}}"}}; +USE {{"{{"}}schema{{"}}"}}; + CREATE OR REFRESH STREAMING TABLE - IDENTIFIER(CONCAT({{"{{"}}catalog{{"}}"}}, '.', {{"{{"}}schema{{"}}"}}, '.', 'orders_raw')) + orders_raw AS SELECT customer_name, DATE(TIMESTAMP(FROM_UNIXTIME(TRY_CAST(order_datetime AS BIGINT)))) AS order_date,