mirror of https://github.com/databricks/cli.git
Add an experimental dbt-sql template (#1059)
## Changes This adds a new dbt-sql template. This work requires the new WorkspaceFS support for dbt tasks. In this latest revision, I've hidden the new template from the list so we can merge it, iterate over it, and propertly release the template at the right time. Blockers: - [x] WorkspaceFS support for dbt projects is in prod - [x] Move dbt files into a subdirectory - [ ] Wait until the next (>1.7.4) release of the dbt plugin which will have major improvements! - _Rather than wait, this template is hidden from the list of templates._ - [x] SQL extension is preconfigured based on extension settings (if possible) - MV / streaming tables: - [x] Add to template - [x] Fix https://github.com/databricks/dbt-databricks/issues/535 (to be released with in 1.7.4) - [x] Merge https://github.com/databricks/dbt-databricks/pull/338 (to be released with in 1.7.4) - [ ] Fix "too many 503 errors" issue (https://github.com/databricks/dbt-databricks/issues/570, internal tracker: ES-1009215, ES-1014138) - [x] Support ANSI mode in the template - [ ] Streaming tables support is either ungated or the template provides instructions about signup - _Mitigation for now: this template is hidden from the list of templates._ - [x] Support non-workspace-admin deployment - [x] Make sure `data_security_mode: SINGLE_USER` works on non-UC workspaces (it's required to be explicitly specified on UC workspaces with single-node clusters) - [x] Support non-UC workspaces ## Tests - [x] Unit tests - [x] Manual testing - [x] More manual testing - [ ] Reviewer manual testing - _I'd like to do a small bug bash post-merging._ - [x] Unit tests
This commit is contained in:
parent
f70ec359dc
commit
1c680121c8
|
@ -25,6 +25,7 @@ type nativeTemplate struct {
|
|||
gitUrl string
|
||||
description string
|
||||
aliases []string
|
||||
hidden bool
|
||||
}
|
||||
|
||||
const customTemplate = "custom..."
|
||||
|
@ -34,6 +35,11 @@ var nativeTemplates = []nativeTemplate{
|
|||
name: "default-python",
|
||||
description: "The default Python template for Notebooks / Delta Live Tables / Workflows",
|
||||
},
|
||||
{
|
||||
name: "dbt-sql",
|
||||
description: "The dbt SQL template (https://www.databricks.com/blog/delivering-cost-effective-data-real-time-dbt-and-databricks)",
|
||||
hidden: true,
|
||||
},
|
||||
{
|
||||
name: "mlops-stacks",
|
||||
gitUrl: "https://github.com/databricks/mlops-stacks",
|
||||
|
@ -50,7 +56,7 @@ var nativeTemplates = []nativeTemplate{
|
|||
func nativeTemplateHelpDescriptions() string {
|
||||
var lines []string
|
||||
for _, template := range nativeTemplates {
|
||||
if template.name != customTemplate {
|
||||
if template.name != customTemplate && !template.hidden {
|
||||
lines = append(lines, fmt.Sprintf("- %s: %s", template.name, template.description))
|
||||
}
|
||||
}
|
||||
|
@ -61,6 +67,9 @@ func nativeTemplateHelpDescriptions() string {
|
|||
func nativeTemplateOptions() []cmdio.Tuple {
|
||||
names := make([]cmdio.Tuple, 0, len(nativeTemplates))
|
||||
for _, template := range nativeTemplates {
|
||||
if template.hidden {
|
||||
continue
|
||||
}
|
||||
tuple := cmdio.Tuple{
|
||||
Name: template.name,
|
||||
Id: template.description,
|
||||
|
|
|
@ -11,6 +11,7 @@ import (
|
|||
|
||||
"github.com/databricks/cli/cmd/root"
|
||||
"github.com/databricks/cli/libs/auth"
|
||||
"github.com/databricks/databricks-sdk-go/apierr"
|
||||
"github.com/databricks/databricks-sdk-go/service/iam"
|
||||
)
|
||||
|
||||
|
@ -29,6 +30,7 @@ type pair struct {
|
|||
|
||||
var cachedUser *iam.User
|
||||
var cachedIsServicePrincipal *bool
|
||||
var cachedCatalog *string
|
||||
|
||||
func loadHelpers(ctx context.Context) template.FuncMap {
|
||||
w := root.WorkspaceClient(ctx)
|
||||
|
@ -108,6 +110,25 @@ func loadHelpers(ctx context.Context) template.FuncMap {
|
|||
}
|
||||
return auth.GetShortUserName(cachedUser.UserName), nil
|
||||
},
|
||||
// Get the default workspace catalog. If there is no default, or if
|
||||
// Unity Catalog is not enabled, return an empty string.
|
||||
"default_catalog": func() (string, error) {
|
||||
if cachedCatalog == nil {
|
||||
metastore, err := w.Metastores.Current(ctx)
|
||||
if err != nil {
|
||||
var aerr *apierr.APIError
|
||||
if errors.As(err, &aerr) && aerr.ErrorCode == "METASTORE_DOES_NOT_EXIST" {
|
||||
// Workspace doesn't have a metastore assigned, ignore error
|
||||
empty_default := ""
|
||||
cachedCatalog = &empty_default
|
||||
return "", nil
|
||||
}
|
||||
return "", err
|
||||
}
|
||||
cachedCatalog = &metastore.DefaultCatalogName
|
||||
}
|
||||
return *cachedCatalog, nil
|
||||
},
|
||||
"is_service_principal": func() (bool, error) {
|
||||
if cachedIsServicePrincipal != nil {
|
||||
return *cachedIsServicePrincipal, nil
|
||||
|
|
|
@ -37,10 +37,10 @@ func assertFilePermissions(t *testing.T, path string, perm fs.FileMode) {
|
|||
assert.Equal(t, perm, info.Mode().Perm())
|
||||
}
|
||||
|
||||
func assertBuiltinTemplateValid(t *testing.T, settings map[string]any, target string, isServicePrincipal bool, build bool, tempDir string) {
|
||||
func assertBuiltinTemplateValid(t *testing.T, template string, settings map[string]any, target string, isServicePrincipal bool, build bool, tempDir string) {
|
||||
ctx := context.Background()
|
||||
|
||||
templatePath, err := prepareBuiltinTemplates("default-python", tempDir)
|
||||
templatePath, err := prepareBuiltinTemplates(template, tempDir)
|
||||
require.NoError(t, err)
|
||||
libraryPath := filepath.Join(templatePath, "library")
|
||||
|
||||
|
@ -50,6 +50,9 @@ func assertBuiltinTemplateValid(t *testing.T, settings map[string]any, target st
|
|||
|
||||
// Prepare helpers
|
||||
cachedUser = &iam.User{UserName: "user@domain.com"}
|
||||
if isServicePrincipal {
|
||||
cachedUser.UserName = "1d410060-a513-496f-a197-23cc82e5f46d"
|
||||
}
|
||||
cachedIsServicePrincipal = &isServicePrincipal
|
||||
ctx = root.SetWorkspaceClient(ctx, w)
|
||||
helpers := loadHelpers(ctx)
|
||||
|
@ -102,11 +105,13 @@ func TestPrepareBuiltInTemplatesWithRelativePaths(t *testing.T) {
|
|||
assert.Equal(t, "./default-python", dir)
|
||||
}
|
||||
|
||||
func TestBuiltinTemplateValid(t *testing.T) {
|
||||
func TestBuiltinPythonTemplateValid(t *testing.T) {
|
||||
// Test option combinations
|
||||
options := []string{"yes", "no"}
|
||||
isServicePrincipal := false
|
||||
build := false
|
||||
catalog := "hive_metastore"
|
||||
cachedCatalog = &catalog
|
||||
for _, includeNotebook := range options {
|
||||
for _, includeDlt := range options {
|
||||
for _, includePython := range options {
|
||||
|
@ -118,7 +123,7 @@ func TestBuiltinTemplateValid(t *testing.T) {
|
|||
"include_python": includePython,
|
||||
}
|
||||
tempDir := t.TempDir()
|
||||
assertBuiltinTemplateValid(t, config, "dev", isServicePrincipal, build, tempDir)
|
||||
assertBuiltinTemplateValid(t, "default-python", config, "dev", isServicePrincipal, build, tempDir)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -140,10 +145,28 @@ func TestBuiltinTemplateValid(t *testing.T) {
|
|||
require.NoError(t, err)
|
||||
defer os.RemoveAll(tempDir)
|
||||
|
||||
assertBuiltinTemplateValid(t, config, "prod", isServicePrincipal, build, tempDir)
|
||||
assertBuiltinTemplateValid(t, "default-python", config, "prod", isServicePrincipal, build, tempDir)
|
||||
defer os.RemoveAll(tempDir)
|
||||
}
|
||||
|
||||
func TestBuiltinDbtTemplateValid(t *testing.T) {
|
||||
for _, personal_schemas := range []string{"yes", "no"} {
|
||||
for _, target := range []string{"dev", "prod"} {
|
||||
for _, isServicePrincipal := range []bool{true, false} {
|
||||
config := map[string]any{
|
||||
"project_name": "my_project",
|
||||
"http_path": "/sql/1.0/warehouses/123",
|
||||
"default_catalog": "hive_metastore",
|
||||
"personal_schemas": personal_schemas,
|
||||
"shared_schema": "lennart",
|
||||
}
|
||||
build := false
|
||||
assertBuiltinTemplateValid(t, "dbt-sql", config, target, isServicePrincipal, build, t.TempDir())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestRendererWithAssociatedTemplateInLibrary(t *testing.T) {
|
||||
tmpDir := t.TempDir()
|
||||
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
# dbt template
|
||||
|
||||
This folder provides a template for using dbt-core with Databricks Asset Bundles.
|
||||
It leverages dbt-core for local development and relies on Databricks Asset Bundles
|
||||
for deployment (either manually or with CI/CD). In production,
|
||||
dbt is executed using Databricks Workflows.
|
||||
|
||||
* Learn more about the dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects.
|
||||
* Learn more about Databricks Asset Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html
|
|
@ -0,0 +1,53 @@
|
|||
{
|
||||
"welcome_message": "\nWelcome to the (EXPERIMENTAL) dbt template for Databricks Asset Bundles!",
|
||||
"properties": {
|
||||
"project_name": {
|
||||
"type": "string",
|
||||
"pattern": "^[A-Za-z_][A-Za-z0-9_]+$",
|
||||
"pattern_match_failure_message": "Name must consist of letters, numbers, and underscores.",
|
||||
"default": "dbt_project",
|
||||
"description": "\nPlease provide a unique name for this project.\nproject_name",
|
||||
"order": 1
|
||||
},
|
||||
"http_path": {
|
||||
"type": "string",
|
||||
"pattern": "^/sql/.\\../warehouses/[a-z0-9]+$",
|
||||
"pattern_match_failure_message": "Path must be of the form /sql/1.0/warehouses/<warehouse id>",
|
||||
"description": " \nPlease provide the HTTP Path of the SQL warehouse you would like to use with dbt during development.\nYou can find this path by clicking on \"Connection details\" for your SQL warehouse.\nhttp_path [example: /sql/1.0/warehouses/abcdef1234567890]",
|
||||
"order": 2
|
||||
},
|
||||
"default_catalog": {
|
||||
"type": "string",
|
||||
"default": "{{default_catalog}}",
|
||||
"pattern": "^\\w*$",
|
||||
"pattern_match_failure_message": "Invalid catalog name.",
|
||||
"description": "\nPlease provide an initial catalog{{if eq (default_catalog) \"\"}} (leave blank when not using Unity Catalog){{end}}.\ndefault_catalog",
|
||||
"order": 3
|
||||
},
|
||||
"personal_schemas": {
|
||||
"type": "string",
|
||||
"description": "\nWould you like to use a personal schema for each user working on this project? (e.g., 'catalog.{{short_name}}')\npersonal_schemas",
|
||||
"enum": [
|
||||
"yes, use a schema based on the current user name during development",
|
||||
"no, use a shared schema during development"
|
||||
],
|
||||
"order": 4
|
||||
},
|
||||
"shared_schema": {
|
||||
"skip_prompt_if": {
|
||||
"properties": {
|
||||
"personal_schemas": {
|
||||
"const": "yes, use a schema based on the current user name during development"
|
||||
}
|
||||
}
|
||||
},
|
||||
"type": "string",
|
||||
"default": "default",
|
||||
"pattern": "^\\w+$",
|
||||
"pattern_match_failure_message": "Invalid schema name.",
|
||||
"description": "\nPlease provide an initial schema during development.\ndefault_schema",
|
||||
"order": 5
|
||||
}
|
||||
},
|
||||
"success_message": "\n📊 Your new project has been created in the '{{.project_name}}' directory!\nIf you already have dbt installed, just type 'cd {{.project_name}}; dbt init' to get started.\nRefer to the README.md file for full \"getting started\" guide and production setup instructions.\n"
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
{{define "latest_lts_dbr_version" -}}
|
||||
13.3.x-scala2.12
|
||||
{{- end}}
|
||||
|
||||
{{define "latest_lts_db_connect_version_spec" -}}
|
||||
>=13.3,<13.4
|
||||
{{- end}}
|
|
@ -0,0 +1,9 @@
|
|||
# Preamble
|
||||
|
||||
This file only template directives; it is skipped for the actual output.
|
||||
|
||||
{{skip "__preamble"}}
|
||||
|
||||
{{if eq .project_name "dbt"}}
|
||||
{{fail "Project name 'dbt' is not supported"}}
|
||||
{{end}}
|
3
libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/__builtins__.pyi
vendored
Normal file
3
libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/__builtins__.pyi
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
# Typings for Pylance in Visual Studio Code
|
||||
# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md
|
||||
from databricks.sdk.runtime import *
|
6
libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/extensions.json
vendored
Normal file
6
libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/extensions.json
vendored
Normal file
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"recommendations": [
|
||||
"redhat.vscode-yaml",
|
||||
"innoverio.vscode-dbt-power-user",
|
||||
]
|
||||
}
|
33
libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/settings.json.tmpl
vendored
Normal file
33
libs/template/templates/dbt-sql/template/{{.project_name}}/.vscode/settings.json.tmpl
vendored
Normal file
|
@ -0,0 +1,33 @@
|
|||
{
|
||||
"python.analysis.stubPath": ".vscode",
|
||||
"databricks.python.envFile": "${workspaceFolder}/.env",
|
||||
"jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
|
||||
"jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
|
||||
"python.testing.pytestArgs": [
|
||||
"."
|
||||
],
|
||||
"python.testing.unittestEnabled": false,
|
||||
"python.testing.pytestEnabled": true,
|
||||
"python.analysis.extraPaths": ["src"],
|
||||
"files.exclude": {
|
||||
"**/*.egg-info": true,
|
||||
"**/__pycache__": true,
|
||||
".pytest_cache": true,
|
||||
},
|
||||
"python.envFile": "${workspaceFolder}/.databricks/.databricks.env",
|
||||
"python.defaultInterpreterPath": "${workspaceFolder}/.venv/bin/python",
|
||||
"sqltools.connections": [
|
||||
{
|
||||
"connectionMethod": "VS Code Extension (beta)",
|
||||
"catalog": "hive_metastore",
|
||||
"previewLimit": 50,
|
||||
"driver": "Databricks",
|
||||
"name": "databricks",
|
||||
"path": "{{.http_path}}"
|
||||
}
|
||||
],
|
||||
"sqltools.autoConnectTo": "",
|
||||
"[jinja-sql]": {
|
||||
"editor.defaultFormatter": "innoverio.vscode-dbt-power-user"
|
||||
}
|
||||
}
|
|
@ -0,0 +1,138 @@
|
|||
# {{.project_name}}
|
||||
|
||||
The '{{.project_name}}' project was generated by using the dbt template for
|
||||
Databricks Asset Bundles. It follows the standard dbt project structure
|
||||
and has an additional `resources` directory to define Databricks resources such as jobs
|
||||
that run dbt models.
|
||||
|
||||
* Learn more about dbt and its standard project structure here: https://docs.getdbt.com/docs/build/projects.
|
||||
* Learn more about Databricks Asset Bundles here: https://docs.databricks.com/en/dev-tools/bundles/index.html
|
||||
|
||||
The remainder of this file includes instructions for local development (using dbt)
|
||||
and deployment to production (using Databricks Asset Bundles).
|
||||
|
||||
## Development setup
|
||||
|
||||
1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html
|
||||
|
||||
2. Authenticate to your Databricks workspace, if you have not done so already:
|
||||
```
|
||||
$ databricks configure
|
||||
```
|
||||
|
||||
3. Install dbt
|
||||
|
||||
To install dbt, you need a recent version of Python. For the instructions below,
|
||||
we assume `python3` refers to the Python version you want to use. On some systems,
|
||||
you may need to refer to a different Python version, e.g. `python` or `/usr/bin/python`.
|
||||
|
||||
Run these instructions from the `{{.project_name}}` directory. We recommend making
|
||||
use of a Python virtual environment and installing dbt as follows:
|
||||
|
||||
```
|
||||
$ python3 -m venv .venv
|
||||
$ . .venv/bin/activate
|
||||
$ pip install -r requirements-dev.txt
|
||||
```
|
||||
|
||||
4. Initialize your dbt profile
|
||||
|
||||
Use `dbt init` to initialize your profile.
|
||||
|
||||
```
|
||||
$ dbt init
|
||||
```
|
||||
|
||||
Note that dbt authentication uses personal access tokens by default
|
||||
(see https://docs.databricks.com/dev-tools/auth/pat.html).
|
||||
You can use OAuth as an alternative, but this currently requires manual configuration.
|
||||
See https://github.com/databricks/dbt-databricks/blob/main/docs/oauth.md
|
||||
for general instructions, or https://community.databricks.com/t5/technical-blog/using-dbt-core-with-oauth-on-azure-databricks/ba-p/46605
|
||||
for advice on setting up OAuth for Azure Databricks.
|
||||
|
||||
To setup up additional profiles, such as a 'prod' profile,
|
||||
see https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles.
|
||||
|
||||
5. Activate dbt so it can be used from the terminal
|
||||
|
||||
```
|
||||
$ . .venv/bin/activate
|
||||
```
|
||||
|
||||
## Local development with dbt
|
||||
|
||||
Use `dbt` to [run this project locally using a SQL warehouse](https://docs.databricks.com/partners/prep/dbt.html):
|
||||
|
||||
```
|
||||
$ dbt seed
|
||||
$ dbt run
|
||||
```
|
||||
|
||||
(Did you get an error that the dbt command could not be found? You may need
|
||||
to try the last step from the development setup above to re-activate
|
||||
your Python virtual environment!)
|
||||
|
||||
|
||||
To just evaluate a single model defined in a file called orders.sql, use:
|
||||
|
||||
```
|
||||
$ dbt run --model orders
|
||||
```
|
||||
|
||||
Use `dbt test` to run tests generated from yml files such as `models/schema.yml`
|
||||
and any SQL tests from `tests/`
|
||||
|
||||
```
|
||||
$ dbt test
|
||||
```
|
||||
|
||||
## Production setup
|
||||
|
||||
Your production dbt profiles are defined in dbt_profiles/profiles.yml.
|
||||
These profiles define the default catalog, schema, and any other
|
||||
target-specific settings. Read more about dbt profiles on Databricks at
|
||||
https://docs.databricks.com/en/workflows/jobs/how-to/use-dbt-in-workflows.html#advanced-run-dbt-with-a-custom-profile.
|
||||
|
||||
The target workspaces for staging and prod are defined in databricks.yml.
|
||||
You can manaully deploy based on these configurations (see below).
|
||||
Or you can use CI/CD to automate deployment. See
|
||||
https://docs.databricks.com/dev-tools/bundles/ci-cd.html for documentation
|
||||
on CI/CD setup.
|
||||
|
||||
## Manually deploying to Databricks with Databricks Asset Bundles
|
||||
|
||||
Databricks Asset Bundles can be used to deploy to Databricks and to execute
|
||||
dbt commands as a job using Databricks Workflows. See
|
||||
https://docs.databricks.com/dev-tools/bundles/index.html to learn more.
|
||||
|
||||
Use the Databricks CLI to deploy a development copy of this project to a workspace:
|
||||
|
||||
```
|
||||
$ databricks bundle deploy --target dev
|
||||
```
|
||||
|
||||
(Note that "dev" is the default target, so the `--target` parameter
|
||||
is optional here.)
|
||||
|
||||
This deploys everything that's defined for this project.
|
||||
For example, the default template would deploy a job called
|
||||
`[dev yourname] {{.project_name}}_job` to your workspace.
|
||||
You can find that job by opening your workpace and clicking on **Workflows**.
|
||||
|
||||
You can also deploy to your production target directly from the command-line.
|
||||
The warehouse, catalog, and schema for that target are configured in databricks.yml.
|
||||
When deploying to this target, note that the default job at resources/{{.project_name}}_job.yml
|
||||
has a schedule set that runs every day. The schedule is paused when deploying in development mode
|
||||
(see https://docs.databricks.com/dev-tools/bundles/deployment-modes.html).
|
||||
|
||||
To deploy a production copy, type:
|
||||
|
||||
```
|
||||
$ databricks bundle deploy --target prod
|
||||
```
|
||||
|
||||
## IDE support
|
||||
|
||||
Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
|
||||
https://docs.databricks.com/dev-tools/vscode-ext.html. Third-party extensions
|
||||
related to dbt may further enhance your dbt development experience!
|
|
@ -0,0 +1,32 @@
|
|||
# This file defines the structure of this project and how it is deployed
|
||||
# to production using Databricks Asset Bundles.
|
||||
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
||||
bundle:
|
||||
name: {{.project_name}}
|
||||
|
||||
include:
|
||||
- resources/*.yml
|
||||
|
||||
# Deployment targets.
|
||||
# The default schema, catalog, etc. for dbt are defined in dbt_profiles/profiles.yml
|
||||
targets:
|
||||
dev:
|
||||
default: true
|
||||
# We use 'mode: development' to indicate this is a personal development copy.
|
||||
# Any job schedules and triggers are paused by default.
|
||||
mode: development
|
||||
workspace:
|
||||
host: {{workspace_host}}
|
||||
|
||||
prod:
|
||||
mode: production
|
||||
workspace:
|
||||
host: {{workspace_host}}
|
||||
# We always use /Users/{{user_name}} for all resources to make sure we only have a single copy.
|
||||
root_path: /Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target}
|
||||
{{- if not is_service_principal}}
|
||||
run_as:
|
||||
# This runs as {{user_name}} in production. We could also use a service principal here
|
||||
# using service_principal_name (see the Databricks documentation).
|
||||
user_name: {{user_name}}
|
||||
{{- end}}
|
|
@ -0,0 +1,36 @@
|
|||
{{- $catalog := .default_catalog}}
|
||||
{{- if eq .default_catalog ""}}
|
||||
{{- $catalog = "\"\" # workspace default"}}
|
||||
{{- end}}
|
||||
# This file defines dbt profiles for deployed dbt jobs.
|
||||
# Note that for local development you should create your own, local profile.
|
||||
# (see README.md).
|
||||
my_dbt_project:
|
||||
target: dev # default target
|
||||
outputs:
|
||||
|
||||
dev:
|
||||
type: databricks
|
||||
method: http
|
||||
catalog: {{$catalog}}
|
||||
schema: "{{"{{"}} var('dev_schema') {{"}}"}}"
|
||||
|
||||
http_path: {{.http_path}}
|
||||
|
||||
# The workspace host / token are provided by Databricks
|
||||
# see databricks.yml for the host used for 'dev'
|
||||
host: "{{"{{"}} env_var('DBT_HOST') {{"}}"}}"
|
||||
token: "{{"{{"}} env_var('DBT_ACCESS_TOKEN') {{"}}"}}"
|
||||
|
||||
prod:
|
||||
type: databricks
|
||||
method: http
|
||||
catalog: {{$catalog}}
|
||||
schema: {{.shared_schema}}
|
||||
|
||||
http_path: {{.http_path}}
|
||||
|
||||
# The workspace host / token are provided by Databricks
|
||||
# see databricks.yml for the host used for 'dev'
|
||||
host: "{{"{{"}} env_var('DBT_HOST') {{"}}"}}"
|
||||
token: "{{"{{"}} env_var('DBT_ACCESS_TOKEN') {{"}}"}}"
|
|
@ -0,0 +1,32 @@
|
|||
name: '{{.project_name}}'
|
||||
version: '1.0.0'
|
||||
config-version: 2
|
||||
|
||||
# This setting configures which "profile" dbt uses for this project.
|
||||
profile: '{{.project_name}}'
|
||||
|
||||
# These configurations specify where dbt should look for different types of files.
|
||||
# For Databricks asset bundles, we put everything in src, as you may have
|
||||
# non-dbt resources in your project.
|
||||
model-paths: ["src/models"]
|
||||
analysis-paths: ["src/analyses"]
|
||||
test-paths: ["src/tests"]
|
||||
seed-paths: ["src/seeds"]
|
||||
macro-paths: ["src/macros"]
|
||||
snapshot-paths: ["src/snapshots"]
|
||||
|
||||
clean-targets: # directories to be removed by `dbt clean`
|
||||
- "target"
|
||||
- "dbt_packages"
|
||||
|
||||
# Configuring models
|
||||
# Full documentation: https://docs.getdbt.com/docs/configuring-models
|
||||
|
||||
# In this example config, we tell dbt to build all models in the example/
|
||||
# directory as views by default. These settings can be overridden in the
|
||||
# individual model files using the `{{"{{"}} config(...) {{"}}"}}` macro.
|
||||
models:
|
||||
{{.project_name}}:
|
||||
# Config indicated by + and applies to all files under models/example/
|
||||
example:
|
||||
+materialized: view
|
|
@ -0,0 +1,37 @@
|
|||
# This file defines prompts with defaults for dbt initializaton.
|
||||
# It is used when the `dbt init` command is invoked.
|
||||
#
|
||||
fixed:
|
||||
type: databricks
|
||||
prompts:
|
||||
host:
|
||||
default: {{(regexp "^https?://").ReplaceAllString workspace_host ""}}
|
||||
token:
|
||||
hint: 'personal access token to use, dapiXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
|
||||
hide_input: true
|
||||
http_path:
|
||||
hint: 'HTTP path of SQL warehouse to use'
|
||||
default: {{.http_path}}
|
||||
{{- if eq .default_catalog ""}}
|
||||
_choose_unity_catalog:
|
||||
'use the default workspace catalog (or do not use Unity Catalog)':
|
||||
_fixed_catalog: null
|
||||
'specify a default catalog':
|
||||
catalog:
|
||||
hint: 'initial catalog'
|
||||
{{- else}}
|
||||
catalog:
|
||||
hint: 'initial catalog'
|
||||
default: {{.default_catalog}}
|
||||
{{- end}}
|
||||
schema:
|
||||
{{- if (regexp "^yes").MatchString .personal_schemas}}
|
||||
hint: 'personal schema where dbt will build objects during development, example: {{short_name}}'
|
||||
{{- else}}
|
||||
hint: 'default schema where dbt will build objects'
|
||||
default: {{.shared_schema}}
|
||||
{{- end}}
|
||||
threads:
|
||||
hint: 'threads to use during development, 1 or more'
|
||||
type: 'int'
|
||||
default: 4
|
|
@ -0,0 +1,3 @@
|
|||
## requirements-dev.txt: dependencies for local development.
|
||||
|
||||
dbt-databricks>=1.0.0,<2.0.0
|
|
@ -0,0 +1,45 @@
|
|||
resources:
|
||||
jobs:
|
||||
{{.project_name}}_job:
|
||||
name: {{.project_name}}_job
|
||||
|
||||
schedule:
|
||||
# Run every day at 9:27 AM
|
||||
quartz_cron_expression: 21 27 9 * * ?
|
||||
timezone_id: UTC
|
||||
|
||||
email_notifications:
|
||||
on_failure:
|
||||
- {{user_name}}
|
||||
|
||||
{{- $dev_schema := .shared_schema }}
|
||||
{{- if (regexp "^yes").MatchString .personal_schemas}}
|
||||
{{- $dev_schema = "${workspace.current_user.short_name}"}}
|
||||
{{- end}}
|
||||
|
||||
tasks:
|
||||
- task_key: dbt
|
||||
|
||||
dbt_task:
|
||||
project_directory: ../
|
||||
# The default schema, catalog, etc. are defined in ../dbt_profiles/profiles.yml
|
||||
profiles_directory: dbt_profiles/
|
||||
commands:
|
||||
- 'dbt deps --target=${bundle.target}'
|
||||
- 'dbt seed --target=${bundle.target} --vars "{ dev_schema: {{$dev_schema}} }"'
|
||||
- 'dbt run --target=${bundle.target} --vars "{ dev_schema: {{$dev_schema}} }"'
|
||||
|
||||
libraries:
|
||||
- pypi:
|
||||
package: dbt-databricks>=1.0.0,<2.0.0
|
||||
|
||||
new_cluster:
|
||||
spark_version: {{template "latest_lts_dbr_version"}}
|
||||
node_type_id: {{smallest_node_type}}
|
||||
data_security_mode: SINGLE_USER
|
||||
num_workers: 0
|
||||
spark_conf:
|
||||
spark.master: "local[*, 4]"
|
||||
spark.databricks.cluster.profile: singleNode
|
||||
custom_tags:
|
||||
ResourceClass: SingleNode
|
|
@ -0,0 +1,24 @@
|
|||
{{- if eq (default_catalog) ""}}
|
||||
{{- /* This workspace might not have Unity Catalog, */}}
|
||||
{{- /* so let's not show both materialized views and streaming tables. */}}
|
||||
{{- /* They're not supported without Unity Catalog! */}}
|
||||
-- This model file defines a table called 'orders_daily'
|
||||
{{"{{"}} config(materialized = 'table') {{"}}"}}
|
||||
{{- else}}
|
||||
-- This model file defines a materialized view called 'orders_daily'
|
||||
--
|
||||
-- Read more about materialized at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables
|
||||
-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561.
|
||||
{{"{{"}} config(materialized = 'materialized_view') {{"}}"}}
|
||||
{{- end}}
|
||||
|
||||
select order_date, count(*) AS number_of_orders
|
||||
|
||||
from {{"{{"}} ref('orders_raw') {{"}}"}}
|
||||
|
||||
-- During development, only process a smaller range of data
|
||||
{% if target.name != 'prod' %}
|
||||
where order_date >= '2019-08-01' and order_date < '2019-09-01'
|
||||
{% endif %}
|
||||
|
||||
group by order_date
|
|
@ -0,0 +1,16 @@
|
|||
-- This model file defines a streaming table called 'orders_raw'
|
||||
--
|
||||
-- The streaming table below ingests all JSON files in /databricks-datasets/retail-org/sales_orders/
|
||||
-- Read more about streaming tables at https://docs.getdbt.com/reference/resource-configs/databricks-configs#materialized-views-and-streaming-tables
|
||||
-- Current limitation: a "full refresh" is needed in case the definition below is changed; see https://github.com/databricks/dbt-databricks/issues/561.
|
||||
{{"{{"}} config(materialized = 'streaming_table') {{"}}"}}
|
||||
|
||||
select
|
||||
customer_name,
|
||||
date(timestamp(from_unixtime(try_cast(order_datetime as bigint)))) as order_date,
|
||||
order_number
|
||||
from stream read_files(
|
||||
"/databricks-datasets/retail-org/sales_orders/",
|
||||
format => "json",
|
||||
header => true
|
||||
)
|
|
@ -0,0 +1,21 @@
|
|||
|
||||
version: 2
|
||||
|
||||
models:
|
||||
- name: orders_raw
|
||||
description: "Raw ingested orders"
|
||||
columns:
|
||||
- name: customer_name
|
||||
description: "The name of a customer"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
||||
|
||||
- name: orders_daily
|
||||
description: "Number of orders by day"
|
||||
columns:
|
||||
- name: order_date
|
||||
description: "The date on which orders took place"
|
||||
tests:
|
||||
- unique
|
||||
- not_null
|
|
@ -6,7 +6,7 @@ The '{{.project_name}}' project was generated by using the default-python templa
|
|||
|
||||
1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html
|
||||
|
||||
2. Authenticate to your Databricks workspace:
|
||||
2. Authenticate to your Databricks workspace, if you have not done so already:
|
||||
```
|
||||
$ databricks configure
|
||||
```
|
||||
|
@ -28,6 +28,11 @@ The '{{.project_name}}' project was generated by using the default-python templa
|
|||
$ databricks bundle deploy --target prod
|
||||
```
|
||||
|
||||
Note that the default job from the template has a schedule that runs every day
|
||||
(defined in resources/{{.project_name}}_job.yml). The schedule
|
||||
is paused when deploying in development mode (see
|
||||
https://docs.databricks.com/dev-tools/bundles/deployment-modes.html).
|
||||
|
||||
5. To run a job or pipeline, use the "run" command:
|
||||
```
|
||||
$ databricks bundle run
|
||||
|
|
Loading…
Reference in New Issue