Make dbt-sql and default-sql templates public (#1463)

## Changes

This makes the dbt-sql and default-sql templates public.

These templates were previously not listed and marked "experimental"
since structured streaming tables were still in gated preview and would
result in weird error messages when a workspace wasn't enabled for the
preview.

This PR also incorporates some of the feedback and learnings for these
templates so far.
This commit is contained in:
Lennart Kats (databricks) 2024-06-04 10:57:13 +02:00 committed by GitHub
parent 70fd8ad3d7
commit aa36aee159
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 59 additions and 29 deletions

View File

@ -38,12 +38,10 @@ var nativeTemplates = []nativeTemplate{
{ {
name: "default-sql", name: "default-sql",
description: "The default SQL template for .sql files that run with Databricks SQL", description: "The default SQL template for .sql files that run with Databricks SQL",
hidden: true,
}, },
{ {
name: "dbt-sql", name: "dbt-sql",
description: "The dbt SQL template (https://www.databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)", description: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)",
hidden: true,
}, },
{ {
name: "mlops-stacks", name: "mlops-stacks",

View File

@ -30,6 +30,8 @@ func TestBundleInitRepoName(t *testing.T) {
func TestNativeTemplateOptions(t *testing.T) { func TestNativeTemplateOptions(t *testing.T) {
expected := []cmdio.Tuple{ expected := []cmdio.Tuple{
{Name: "default-python", Id: "The default Python template for Notebooks / Delta Live Tables / Workflows"}, {Name: "default-python", Id: "The default Python template for Notebooks / Delta Live Tables / Workflows"},
{Name: "default-sql", Id: "The default SQL template for .sql files that run with Databricks SQL"},
{Name: "dbt-sql", Id: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)"},
{Name: "mlops-stacks", Id: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)"}, {Name: "mlops-stacks", Id: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)"},
{Name: "custom...", Id: "Bring your own template"}, {Name: "custom...", Id: "Bring your own template"},
} }
@ -38,6 +40,8 @@ func TestNativeTemplateOptions(t *testing.T) {
func TestNativeTemplateHelpDescriptions(t *testing.T) { func TestNativeTemplateHelpDescriptions(t *testing.T) {
expected := `- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows expected := `- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows
- default-sql: The default SQL template for .sql files that run with Databricks SQL
- dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)
- mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)` - mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)`
assert.Equal(t, expected, nativeTemplateHelpDescriptions()) assert.Equal(t, expected, nativeTemplateHelpDescriptions())
} }

View File

@ -54,12 +54,6 @@ func Materialize(ctx context.Context, configFilePath, templateRoot, outputDir st
return err return err
} }
// Print welcome message
welcome := config.schema.WelcomeMessage
if welcome != "" {
cmdio.LogString(ctx, welcome)
}
// Read and assign config values from file // Read and assign config values from file
if configFilePath != "" { if configFilePath != "" {
err = config.assignValuesFromFile(configFilePath) err = config.assignValuesFromFile(configFilePath)
@ -73,6 +67,16 @@ func Materialize(ctx context.Context, configFilePath, templateRoot, outputDir st
return err return err
} }
// Print welcome message
welcome := config.schema.WelcomeMessage
if welcome != "" {
welcome, err = r.executeTemplate(welcome)
if err != nil {
return err
}
cmdio.LogString(ctx, welcome)
}
// Prompt user for any missing config values. Assign default values if // Prompt user for any missing config values. Assign default values if
// terminal is not TTY // terminal is not TTY
err = config.promptOrAssignDefaultValues(r) err = config.promptOrAssignDefaultValues(r)

View File

@ -1,5 +1,5 @@
{ {
"welcome_message": "\nWelcome to the (EXPERIMENTAL) dbt template for Databricks Asset Bundles!", "welcome_message": "\nWelcome to the dbt template for Databricks Asset Bundles!\n\nWorkspace selected based on your current profile (see https://docs.databricks.com/dev-tools/cli/profiles.html for how to change this).\nworkspace_host: {{workspace_host}}",
"properties": { "properties": {
"project_name": { "project_name": {
"type": "string", "type": "string",

View File

@ -3,26 +3,35 @@
{{- $catalog = "\"\" # workspace default"}} {{- $catalog = "\"\" # workspace default"}}
{{- end}} {{- end}}
# This file defines dbt profiles for deployed dbt jobs. # This file defines dbt profiles for deployed dbt jobs.
# Note that for local development you should create your own, local profile.
# (see README.md).
my_dbt_project: my_dbt_project:
target: dev # default target target: dev # default target
outputs: outputs:
dev: # Doing local development with the dbt CLI?
# Then you should create your own profile in your .dbt/profiles.yml using 'dbt init'
# (See README.md)
# The default target when deployed with the Databricks CLI
# N.B. when you use dbt from the command line, it uses the profile from .dbt/profiles.yml
dev:
type: databricks type: databricks
method: http method: http
catalog: {{$catalog}} catalog: {{$catalog}}
{{- if (regexp "^yes").MatchString .personal_schemas}}
schema: "{{"{{"}} var('dev_schema') {{"}}"}}" schema: "{{"{{"}} var('dev_schema') {{"}}"}}"
{{- else}}
schema: "{{.shared_schema}}"
{{- end}}
http_path: {{.http_path}} http_path: {{.http_path}}
# The workspace host / token are provided by Databricks # The workspace host / token are provided by Databricks
# see databricks.yml for the host used for 'dev' # see databricks.yml for the workspace host used for 'dev'
host: "{{"{{"}} env_var('DBT_HOST') {{"}}"}}" host: "{{"{{"}} env_var('DBT_HOST') {{"}}"}}"
token: "{{"{{"}} env_var('DBT_ACCESS_TOKEN') {{"}}"}}" token: "{{"{{"}} env_var('DBT_ACCESS_TOKEN') {{"}}"}}"
prod: # The production target when deployed with the Databricks CLI
prod:
type: databricks type: databricks
method: http method: http
catalog: {{$catalog}} catalog: {{$catalog}}
@ -31,6 +40,6 @@ my_dbt_project:
http_path: {{.http_path}} http_path: {{.http_path}}
# The workspace host / token are provided by Databricks # The workspace host / token are provided by Databricks
# see databricks.yml for the host used for 'dev' # see databricks.yml for the workspace host used for 'prod'
host: "{{"{{"}} env_var('DBT_HOST') {{"}}"}}" host: "{{"{{"}} env_var('DBT_HOST') {{"}}"}}"
token: "{{"{{"}} env_var('DBT_ACCESS_TOKEN') {{"}}"}}" token: "{{"{{"}} env_var('DBT_ACCESS_TOKEN') {{"}}"}}"

View File

@ -12,10 +12,6 @@ resources:
on_failure: on_failure:
- {{user_name}} - {{user_name}}
{{- $dev_schema := .shared_schema }}
{{- if (regexp "^yes").MatchString .personal_schemas}}
{{- $dev_schema = "${workspace.current_user.short_name}"}}
{{- end}}
tasks: tasks:
- task_key: dbt - task_key: dbt
@ -25,9 +21,17 @@ resources:
# The default schema, catalog, etc. are defined in ../dbt_profiles/profiles.yml # The default schema, catalog, etc. are defined in ../dbt_profiles/profiles.yml
profiles_directory: dbt_profiles/ profiles_directory: dbt_profiles/
commands: commands:
{{- if (regexp "^yes").MatchString .personal_schemas}}
# The dbt commands to run (see also dbt_profiles/profiles.yml; dev_schema is used in the dev profile)
- 'dbt deps --target=${bundle.target}' - 'dbt deps --target=${bundle.target}'
- 'dbt seed --target=${bundle.target} --vars "{ dev_schema: {{$dev_schema}} }"' - 'dbt seed --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"'
- 'dbt run --target=${bundle.target} --vars "{ dev_schema: {{$dev_schema}} }"' - 'dbt run --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"'
{{- else}}
# The dbt commands to run (see also the dev/prod profiles in dbt_profiles/profiles.yml)
- 'dbt deps --target=${bundle.target}'
- 'dbt seed --target=${bundle.target}'
- 'dbt run --target=${bundle.target}'
{{- end}}
libraries: libraries:
- pypi: - pypi:

View File

@ -1,5 +1,5 @@
{ {
"welcome_message": "\nWelcome to the (EXPERIMENTAL) default SQL template for Databricks Asset Bundles!", "welcome_message": "\nWelcome to the default SQL template for Databricks Asset Bundles!\n\nWorkspace selected based on your current profile (see https://docs.databricks.com/dev-tools/cli/profiles.html for how to change this).\nworkspace_host: {{workspace_host}}",
"properties": { "properties": {
"project_name": { "project_name": {
"type": "string", "type": "string",

View File

@ -1,14 +1,22 @@
-- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql_job.yml) -- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql_job.yml)
{{- /* We can't use a materialized view here since they don't support 'create or refresh yet.*/}} {{- /* We can't use a materialized view here since they don't support 'create or refresh' yet.*/}}
USE CATALOG {{"{{"}}catalog{{"}}"}};
USE {{"{{"}}schema{{"}}"}};
CREATE OR REPLACE VIEW CREATE OR REPLACE VIEW
IDENTIFIER(CONCAT({{"{{"}}catalog{{"}}"}}, '.', {{"{{"}}schema{{"}}"}}, '.', 'orders_daily')) orders_daily
AS SELECT AS SELECT
order_date, count(*) AS number_of_orders order_date, count(*) AS number_of_orders
FROM FROM
IDENTIFIER(CONCAT({{"{{"}}catalog{{"}}"}}, '.', {{"{{"}}schema{{"}}"}}, '.', 'orders_raw')) orders_raw
-- During development, only process a smaller range of data WHERE if(
WHERE {{"{{"}}bundle_target{{"}}"}} == "prod" OR (order_date >= '2019-08-01' AND order_date < '2019-09-01') {{"{{"}}bundle_target{{"}}"}} != "prod",
true,
-- During development, only process a smaller range of data
order_date >= '2019-08-01' AND order_date < '2019-09-01'
)
GROUP BY order_date GROUP BY order_date

View File

@ -3,8 +3,11 @@
-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/ -- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/
-- See also https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-create-streaming-table.html -- See also https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-create-streaming-table.html
USE CATALOG {{"{{"}}catalog{{"}}"}};
USE {{"{{"}}schema{{"}}"}};
CREATE OR REFRESH STREAMING TABLE CREATE OR REFRESH STREAMING TABLE
IDENTIFIER(CONCAT({{"{{"}}catalog{{"}}"}}, '.', {{"{{"}}schema{{"}}"}}, '.', 'orders_raw')) orders_raw
AS SELECT AS SELECT
customer_name, customer_name,
DATE(TIMESTAMP(FROM_UNIXTIME(TRY_CAST(order_datetime AS BIGINT)))) AS order_date, DATE(TIMESTAMP(FROM_UNIXTIME(TRY_CAST(order_datetime AS BIGINT)))) AS order_date,