mirror of https://github.com/databricks/cli.git
Make dbt-sql and default-sql templates public (#1463)
## Changes This makes the dbt-sql and default-sql templates public. These templates were previously not listed and marked "experimental" since structured streaming tables were still in gated preview and would result in weird error messages when a workspace wasn't enabled for the preview. This PR also incorporates some of the feedback and learnings for these templates so far.
This commit is contained in:
parent
70fd8ad3d7
commit
aa36aee159
|
@ -38,12 +38,10 @@ var nativeTemplates = []nativeTemplate{
|
||||||
{
|
{
|
||||||
name: "default-sql",
|
name: "default-sql",
|
||||||
description: "The default SQL template for .sql files that run with Databricks SQL",
|
description: "The default SQL template for .sql files that run with Databricks SQL",
|
||||||
hidden: true,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "dbt-sql",
|
name: "dbt-sql",
|
||||||
description: "The dbt SQL template (https://www.databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)",
|
description: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)",
|
||||||
hidden: true,
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "mlops-stacks",
|
name: "mlops-stacks",
|
||||||
|
|
|
@ -30,6 +30,8 @@ func TestBundleInitRepoName(t *testing.T) {
|
||||||
func TestNativeTemplateOptions(t *testing.T) {
|
func TestNativeTemplateOptions(t *testing.T) {
|
||||||
expected := []cmdio.Tuple{
|
expected := []cmdio.Tuple{
|
||||||
{Name: "default-python", Id: "The default Python template for Notebooks / Delta Live Tables / Workflows"},
|
{Name: "default-python", Id: "The default Python template for Notebooks / Delta Live Tables / Workflows"},
|
||||||
|
{Name: "default-sql", Id: "The default SQL template for .sql files that run with Databricks SQL"},
|
||||||
|
{Name: "dbt-sql", Id: "The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)"},
|
||||||
{Name: "mlops-stacks", Id: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)"},
|
{Name: "mlops-stacks", Id: "The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)"},
|
||||||
{Name: "custom...", Id: "Bring your own template"},
|
{Name: "custom...", Id: "Bring your own template"},
|
||||||
}
|
}
|
||||||
|
@ -38,6 +40,8 @@ func TestNativeTemplateOptions(t *testing.T) {
|
||||||
|
|
||||||
func TestNativeTemplateHelpDescriptions(t *testing.T) {
|
func TestNativeTemplateHelpDescriptions(t *testing.T) {
|
||||||
expected := `- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows
|
expected := `- default-python: The default Python template for Notebooks / Delta Live Tables / Workflows
|
||||||
|
- default-sql: The default SQL template for .sql files that run with Databricks SQL
|
||||||
|
- dbt-sql: The dbt SQL template (databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)
|
||||||
- mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)`
|
- mlops-stacks: The Databricks MLOps Stacks template (github.com/databricks/mlops-stacks)`
|
||||||
assert.Equal(t, expected, nativeTemplateHelpDescriptions())
|
assert.Equal(t, expected, nativeTemplateHelpDescriptions())
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,12 +54,6 @@ func Materialize(ctx context.Context, configFilePath, templateRoot, outputDir st
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print welcome message
|
|
||||||
welcome := config.schema.WelcomeMessage
|
|
||||||
if welcome != "" {
|
|
||||||
cmdio.LogString(ctx, welcome)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read and assign config values from file
|
// Read and assign config values from file
|
||||||
if configFilePath != "" {
|
if configFilePath != "" {
|
||||||
err = config.assignValuesFromFile(configFilePath)
|
err = config.assignValuesFromFile(configFilePath)
|
||||||
|
@ -73,6 +67,16 @@ func Materialize(ctx context.Context, configFilePath, templateRoot, outputDir st
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Print welcome message
|
||||||
|
welcome := config.schema.WelcomeMessage
|
||||||
|
if welcome != "" {
|
||||||
|
welcome, err = r.executeTemplate(welcome)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
cmdio.LogString(ctx, welcome)
|
||||||
|
}
|
||||||
|
|
||||||
// Prompt user for any missing config values. Assign default values if
|
// Prompt user for any missing config values. Assign default values if
|
||||||
// terminal is not TTY
|
// terminal is not TTY
|
||||||
err = config.promptOrAssignDefaultValues(r)
|
err = config.promptOrAssignDefaultValues(r)
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
{
|
{
|
||||||
"welcome_message": "\nWelcome to the (EXPERIMENTAL) dbt template for Databricks Asset Bundles!",
|
"welcome_message": "\nWelcome to the dbt template for Databricks Asset Bundles!\n\nWorkspace selected based on your current profile (see https://docs.databricks.com/dev-tools/cli/profiles.html for how to change this).\nworkspace_host: {{workspace_host}}",
|
||||||
"properties": {
|
"properties": {
|
||||||
"project_name": {
|
"project_name": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
|
|
@ -3,26 +3,35 @@
|
||||||
{{- $catalog = "\"\" # workspace default"}}
|
{{- $catalog = "\"\" # workspace default"}}
|
||||||
{{- end}}
|
{{- end}}
|
||||||
# This file defines dbt profiles for deployed dbt jobs.
|
# This file defines dbt profiles for deployed dbt jobs.
|
||||||
# Note that for local development you should create your own, local profile.
|
|
||||||
# (see README.md).
|
|
||||||
my_dbt_project:
|
my_dbt_project:
|
||||||
target: dev # default target
|
target: dev # default target
|
||||||
outputs:
|
outputs:
|
||||||
|
|
||||||
dev:
|
# Doing local development with the dbt CLI?
|
||||||
|
# Then you should create your own profile in your .dbt/profiles.yml using 'dbt init'
|
||||||
|
# (See README.md)
|
||||||
|
|
||||||
|
# The default target when deployed with the Databricks CLI
|
||||||
|
# N.B. when you use dbt from the command line, it uses the profile from .dbt/profiles.yml
|
||||||
|
dev:
|
||||||
type: databricks
|
type: databricks
|
||||||
method: http
|
method: http
|
||||||
catalog: {{$catalog}}
|
catalog: {{$catalog}}
|
||||||
|
{{- if (regexp "^yes").MatchString .personal_schemas}}
|
||||||
schema: "{{"{{"}} var('dev_schema') {{"}}"}}"
|
schema: "{{"{{"}} var('dev_schema') {{"}}"}}"
|
||||||
|
{{- else}}
|
||||||
|
schema: "{{.shared_schema}}"
|
||||||
|
{{- end}}
|
||||||
|
|
||||||
http_path: {{.http_path}}
|
http_path: {{.http_path}}
|
||||||
|
|
||||||
# The workspace host / token are provided by Databricks
|
# The workspace host / token are provided by Databricks
|
||||||
# see databricks.yml for the host used for 'dev'
|
# see databricks.yml for the workspace host used for 'dev'
|
||||||
host: "{{"{{"}} env_var('DBT_HOST') {{"}}"}}"
|
host: "{{"{{"}} env_var('DBT_HOST') {{"}}"}}"
|
||||||
token: "{{"{{"}} env_var('DBT_ACCESS_TOKEN') {{"}}"}}"
|
token: "{{"{{"}} env_var('DBT_ACCESS_TOKEN') {{"}}"}}"
|
||||||
|
|
||||||
prod:
|
# The production target when deployed with the Databricks CLI
|
||||||
|
prod:
|
||||||
type: databricks
|
type: databricks
|
||||||
method: http
|
method: http
|
||||||
catalog: {{$catalog}}
|
catalog: {{$catalog}}
|
||||||
|
@ -31,6 +40,6 @@ my_dbt_project:
|
||||||
http_path: {{.http_path}}
|
http_path: {{.http_path}}
|
||||||
|
|
||||||
# The workspace host / token are provided by Databricks
|
# The workspace host / token are provided by Databricks
|
||||||
# see databricks.yml for the host used for 'dev'
|
# see databricks.yml for the workspace host used for 'prod'
|
||||||
host: "{{"{{"}} env_var('DBT_HOST') {{"}}"}}"
|
host: "{{"{{"}} env_var('DBT_HOST') {{"}}"}}"
|
||||||
token: "{{"{{"}} env_var('DBT_ACCESS_TOKEN') {{"}}"}}"
|
token: "{{"{{"}} env_var('DBT_ACCESS_TOKEN') {{"}}"}}"
|
||||||
|
|
|
@ -12,10 +12,6 @@ resources:
|
||||||
on_failure:
|
on_failure:
|
||||||
- {{user_name}}
|
- {{user_name}}
|
||||||
|
|
||||||
{{- $dev_schema := .shared_schema }}
|
|
||||||
{{- if (regexp "^yes").MatchString .personal_schemas}}
|
|
||||||
{{- $dev_schema = "${workspace.current_user.short_name}"}}
|
|
||||||
{{- end}}
|
|
||||||
|
|
||||||
tasks:
|
tasks:
|
||||||
- task_key: dbt
|
- task_key: dbt
|
||||||
|
@ -25,9 +21,17 @@ resources:
|
||||||
# The default schema, catalog, etc. are defined in ../dbt_profiles/profiles.yml
|
# The default schema, catalog, etc. are defined in ../dbt_profiles/profiles.yml
|
||||||
profiles_directory: dbt_profiles/
|
profiles_directory: dbt_profiles/
|
||||||
commands:
|
commands:
|
||||||
|
{{- if (regexp "^yes").MatchString .personal_schemas}}
|
||||||
|
# The dbt commands to run (see also dbt_profiles/profiles.yml; dev_schema is used in the dev profile)
|
||||||
- 'dbt deps --target=${bundle.target}'
|
- 'dbt deps --target=${bundle.target}'
|
||||||
- 'dbt seed --target=${bundle.target} --vars "{ dev_schema: {{$dev_schema}} }"'
|
- 'dbt seed --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"'
|
||||||
- 'dbt run --target=${bundle.target} --vars "{ dev_schema: {{$dev_schema}} }"'
|
- 'dbt run --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"'
|
||||||
|
{{- else}}
|
||||||
|
# The dbt commands to run (see also the dev/prod profiles in dbt_profiles/profiles.yml)
|
||||||
|
- 'dbt deps --target=${bundle.target}'
|
||||||
|
- 'dbt seed --target=${bundle.target}'
|
||||||
|
- 'dbt run --target=${bundle.target}'
|
||||||
|
{{- end}}
|
||||||
|
|
||||||
libraries:
|
libraries:
|
||||||
- pypi:
|
- pypi:
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
{
|
{
|
||||||
"welcome_message": "\nWelcome to the (EXPERIMENTAL) default SQL template for Databricks Asset Bundles!",
|
"welcome_message": "\nWelcome to the default SQL template for Databricks Asset Bundles!\n\nWorkspace selected based on your current profile (see https://docs.databricks.com/dev-tools/cli/profiles.html for how to change this).\nworkspace_host: {{workspace_host}}",
|
||||||
"properties": {
|
"properties": {
|
||||||
"project_name": {
|
"project_name": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
|
|
|
@ -1,14 +1,22 @@
|
||||||
-- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql_job.yml)
|
-- This query is executed using Databricks Workflows (see resources/{{.project_name}}_sql_job.yml)
|
||||||
{{- /* We can't use a materialized view here since they don't support 'create or refresh yet.*/}}
|
{{- /* We can't use a materialized view here since they don't support 'create or refresh' yet.*/}}
|
||||||
|
|
||||||
|
USE CATALOG {{"{{"}}catalog{{"}}"}};
|
||||||
|
USE {{"{{"}}schema{{"}}"}};
|
||||||
|
|
||||||
CREATE OR REPLACE VIEW
|
CREATE OR REPLACE VIEW
|
||||||
IDENTIFIER(CONCAT({{"{{"}}catalog{{"}}"}}, '.', {{"{{"}}schema{{"}}"}}, '.', 'orders_daily'))
|
orders_daily
|
||||||
AS SELECT
|
AS SELECT
|
||||||
order_date, count(*) AS number_of_orders
|
order_date, count(*) AS number_of_orders
|
||||||
FROM
|
FROM
|
||||||
IDENTIFIER(CONCAT({{"{{"}}catalog{{"}}"}}, '.', {{"{{"}}schema{{"}}"}}, '.', 'orders_raw'))
|
orders_raw
|
||||||
|
|
||||||
-- During development, only process a smaller range of data
|
WHERE if(
|
||||||
WHERE {{"{{"}}bundle_target{{"}}"}} == "prod" OR (order_date >= '2019-08-01' AND order_date < '2019-09-01')
|
{{"{{"}}bundle_target{{"}}"}} != "prod",
|
||||||
|
true,
|
||||||
|
|
||||||
|
-- During development, only process a smaller range of data
|
||||||
|
order_date >= '2019-08-01' AND order_date < '2019-09-01'
|
||||||
|
)
|
||||||
|
|
||||||
GROUP BY order_date
|
GROUP BY order_date
|
||||||
|
|
|
@ -3,8 +3,11 @@
|
||||||
-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/
|
-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/
|
||||||
-- See also https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-create-streaming-table.html
|
-- See also https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-create-streaming-table.html
|
||||||
|
|
||||||
|
USE CATALOG {{"{{"}}catalog{{"}}"}};
|
||||||
|
USE {{"{{"}}schema{{"}}"}};
|
||||||
|
|
||||||
CREATE OR REFRESH STREAMING TABLE
|
CREATE OR REFRESH STREAMING TABLE
|
||||||
IDENTIFIER(CONCAT({{"{{"}}catalog{{"}}"}}, '.', {{"{{"}}schema{{"}}"}}, '.', 'orders_raw'))
|
orders_raw
|
||||||
AS SELECT
|
AS SELECT
|
||||||
customer_name,
|
customer_name,
|
||||||
DATE(TIMESTAMP(FROM_UNIXTIME(TRY_CAST(order_datetime AS BIGINT)))) AS order_date,
|
DATE(TIMESTAMP(FROM_UNIXTIME(TRY_CAST(order_datetime AS BIGINT)))) AS order_date,
|
||||||
|
|
Loading…
Reference in New Issue