Support serverless mode in default-python template (explicit prompt) (#2377)

## Changes

- Add 'serverless' prompt to default-python template (default is
currently set to "no").
- This is a simplified version of
https://github.com/databricks/cli/pull/2348 with 'auto' functionality
removed.

## Tests
- Split default-python into default-python/classic,
default-python/serverless, default-python/serverless-customcatalog.
- Manually check that "bundle init default-python" with serverless=yes
can be deployed and run on dogfood and test env.
This commit is contained in:
Denis Bilenko 2025-02-26 14:07:30 +01:00 committed by GitHub
parent df001dcdfe
commit 03f2ff5a39
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
34 changed files with 211 additions and 32 deletions

View File

@ -2,5 +2,6 @@
"project_name": "my_default_python",
"include_notebook": "yes",
"include_dlt": "yes",
"include_python": "yes"
"include_python": "yes",
"serverless": "no"
}

View File

@ -0,0 +1,54 @@
--- [TESTROOT]/bundle/templates/default-python/classic/../serverless/output/my_default_python/resources/my_default_python.job.yml
+++ output/my_default_python/resources/my_default_python.job.yml
@@ -17,4 +17,5 @@
tasks:
- task_key: notebook_task
+ job_cluster_key: job_cluster
notebook_task:
notebook_path: ../src/notebook.ipynb
@@ -29,17 +30,21 @@
depends_on:
- task_key: refresh_pipeline
- environment_key: default
+ job_cluster_key: job_cluster
python_wheel_task:
package_name: my_default_python
entry_point: main
+ libraries:
+ # By default we just include the .whl file generated for the my_default_python package.
+ # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
+ # for more information on how to add other libraries.
+ - whl: ../dist/*.whl
- # A list of task execution environment specifications that can be referenced by tasks of this job.
- environments:
- - environment_key: default
-
- # Full documentation of this spec can be found at:
- # https://docs.databricks.com/api/workspace/jobs/create#environments-spec
- spec:
- client: "1"
- dependencies:
- - ../dist/*.whl
+ job_clusters:
+ - job_cluster_key: job_cluster
+ new_cluster:
+ spark_version: 15.4.x-scala2.12
+ node_type_id: i3.xlarge
+ data_security_mode: SINGLE_USER
+ autoscale:
+ min_workers: 1
+ max_workers: 4
--- [TESTROOT]/bundle/templates/default-python/classic/../serverless/output/my_default_python/resources/my_default_python.pipeline.yml
+++ output/my_default_python/resources/my_default_python.pipeline.yml
@@ -4,8 +4,7 @@
my_default_python_pipeline:
name: my_default_python_pipeline
- ## Catalog is required for serverless compute
- catalog: main
+ ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog:
+ # catalog: catalog_name
target: my_default_python_${bundle.target}
- serverless: true
libraries:
- notebook:

View File

@ -0,0 +1,15 @@
trace $CLI bundle init default-python --config-file ./input.json --output-dir output
cd output/my_default_python
trace $CLI bundle validate -t dev
trace $CLI bundle validate -t prod
# Do not affect this repository's git behaviour #2318
mv .gitignore out.gitignore
cd ../../
# Calculate the difference from the serverless template
diff.py $TESTDIR/../serverless/output output/ > out.compare-vs-serverless.diff
rm -fr output

View File

@ -0,0 +1,22 @@
>>> [CLI] bundle init default-python --config-file [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/input.json --output-dir output
Welcome to the default Python template for Databricks Asset Bundles!
Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL]
✨ Your new project has been created in the 'my_default_python' directory!
Please refer to the README.md file for "getting started" instructions.
See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html.
>>> diff.py [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output output/
--- [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output/my_default_python/resources/my_default_python.pipeline.yml
+++ output/my_default_python/resources/my_default_python.pipeline.yml
@@ -4,6 +4,5 @@
my_default_python_pipeline:
name: my_default_python_pipeline
- ## Catalog is required for serverless compute
- catalog: main
+ catalog: customcatalog
target: my_default_python_${bundle.target}
serverless: true

View File

@ -0,0 +1,4 @@
trace $CLI bundle init default-python --config-file $TESTDIR/../serverless/input.json --output-dir output
mv output/my_default_python/.gitignore output/my_default_python/out.gitignore
trace diff.py $TESTDIR/../serverless/output output/
rm -fr output

View File

@ -0,0 +1,8 @@
[[Server]]
Pattern = "GET /api/2.1/unity-catalog/current-metastore-assignment"
Response.Body = '{"default_catalog_name": "customcatalog"}'
[[Repls]]
# windows fix
Old = '\\'
New = '/'

View File

@ -0,0 +1,7 @@
{
"project_name": "my_default_python",
"include_notebook": "yes",
"include_dlt": "yes",
"include_python": "yes",
"serverless": "yes"
}

View File

@ -0,0 +1,30 @@
>>> [CLI] bundle init default-python --config-file ./input.json --output-dir output
Welcome to the default Python template for Databricks Asset Bundles!
Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL]
✨ Your new project has been created in the 'my_default_python' directory!
Please refer to the README.md file for "getting started" instructions.
See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html.
>>> [CLI] bundle validate -t dev
Name: my_default_python
Target: dev
Workspace:
Host: [DATABRICKS_URL]
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/dev
Validation OK!
>>> [CLI] bundle validate -t prod
Name: my_default_python
Target: prod
Workspace:
Host: [DATABRICKS_URL]
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/prod
Validation OK!

View File

@ -16,7 +16,6 @@ resources:
tasks:
- task_key: notebook_task
job_cluster_key: job_cluster
notebook_task:
notebook_path: ../src/notebook.ipynb
@ -29,22 +28,18 @@ resources:
- task_key: main_task
depends_on:
- task_key: refresh_pipeline
job_cluster_key: job_cluster
environment_key: default
python_wheel_task:
package_name: my_default_python
entry_point: main
libraries:
# By default we just include the .whl file generated for the my_default_python package.
# See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
# for more information on how to add other libraries.
- whl: ../dist/*.whl
job_clusters:
- job_cluster_key: job_cluster
new_cluster:
spark_version: 15.4.x-scala2.12
node_type_id: i3.xlarge
data_security_mode: SINGLE_USER
autoscale:
min_workers: 1
max_workers: 4
# A list of task execution environment specifications that can be referenced by tasks of this job.
environments:
- environment_key: default
# Full documentation of this spec can be found at:
# https://docs.databricks.com/api/workspace/jobs/create#environments-spec
spec:
client: "1"
dependencies:
- ../dist/*.whl

View File

@ -3,8 +3,10 @@ resources:
pipelines:
my_default_python_pipeline:
name: my_default_python_pipeline
## Catalog is required for serverless compute
catalog: main
target: my_default_python_${bundle.target}
serverless: true
libraries:
- notebook:
path: ../src/dlt_pipeline.ipynb

View File

@ -20,6 +20,12 @@ var testUser = iam.User{
UserName: "tester@databricks.com",
}
var testMetastore = catalog.MetastoreAssignment{
DefaultCatalogName: "hive_metastore",
MetastoreId: "120efa64-9b68-46ba-be38-f319458430d2",
WorkspaceId: 470123456789500,
}
func AddHandlers(server *testserver.Server) {
server.Handle("GET", "/api/2.0/policies/clusters/list", func(req testserver.Request) any {
return compute.ListPoliciesResponse{
@ -106,9 +112,7 @@ func AddHandlers(server *testserver.Server) {
})
server.Handle("GET", "/api/2.1/unity-catalog/current-metastore-assignment", func(req testserver.Request) any {
return catalog.MetastoreAssignment{
DefaultCatalogName: "main",
}
return testMetastore
})
server.Handle("GET", "/api/2.0/permissions/directories/{objectId}", func(req testserver.Request) any {

View File

@ -116,11 +116,13 @@ func TestBuiltinPythonTemplateValid(t *testing.T) {
for _, includeDlt := range options {
for _, includePython := range options {
for _, isServicePrincipal := range []bool{true, false} {
for _, serverless := range options {
config := map[string]any{
"project_name": "my_project",
"include_notebook": includeNotebook,
"include_dlt": includeDlt,
"include_python": includePython,
"serverless": serverless,
}
tempDir := t.TempDir()
assertBuiltinTemplateValid(t, "default-python", config, "dev", isServicePrincipal, build, tempDir)
@ -128,6 +130,7 @@ func TestBuiltinPythonTemplateValid(t *testing.T) {
}
}
}
}
// Test prod mode + build
config := map[string]any{
@ -135,6 +138,7 @@ func TestBuiltinPythonTemplateValid(t *testing.T) {
"include_notebook": "yes",
"include_dlt": "yes",
"include_python": "yes",
"serverless": "yes",
}
isServicePrincipal = false
build = true

View File

@ -29,6 +29,13 @@
"enum": ["yes", "no"],
"description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'",
"order": 4
},
"serverless": {
"type": "string",
"default": "no",
"enum": ["yes", "no"],
"description": "Use serverless compute",
"order": 5
}
},
"success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html."

View File

@ -4,6 +4,7 @@
{{if and (eq .include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}}
# This job runs {{.project_name}}_pipeline on a schedule.
{{end -}}
{{$with_serverless := (eq .serverless "yes") -}}
resources:
jobs:
@ -29,7 +30,8 @@ resources:
tasks:
{{- if eq .include_notebook "yes" }}
- task_key: notebook_task
job_cluster_key: job_cluster
{{- if not $with_serverless}}
job_cluster_key: job_cluster{{end}}
notebook_task:
notebook_path: ../src/notebook.ipynb
{{end -}}
@ -52,18 +54,34 @@ resources:
depends_on:
- task_key: notebook_task
{{end}}
job_cluster_key: job_cluster
{{- if $with_serverless }}
environment_key: default
{{- else }}
job_cluster_key: job_cluster{{end}}
python_wheel_task:
package_name: {{.project_name}}
entry_point: main
{{- if not $with_serverless }}
libraries:
# By default we just include the .whl file generated for the {{.project_name}} package.
# See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
# for more information on how to add other libraries.
- whl: ../dist/*.whl
{{- end -}}
{{else}}
{{- end}}
{{if $with_serverless}}
# A list of task execution environment specifications that can be referenced by tasks of this job.
environments:
- environment_key: default
# Full documentation of this spec can be found at:
# https://docs.databricks.com/api/workspace/jobs/create#environments-spec
spec:
client: "1"
dependencies:
- ../dist/*.whl
{{ else }}
{{end -}}
job_clusters:
- job_cluster_key: job_cluster
new_cluster:
@ -73,3 +91,4 @@ resources:
autoscale:
min_workers: 1
max_workers: 4
{{end -}}

View File

@ -1,15 +1,22 @@
{{$with_serverless := (eq .serverless "yes") -}}
# The main pipeline for {{.project_name}}
resources:
pipelines:
{{.project_name}}_pipeline:
name: {{.project_name}}_pipeline
{{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}}
{{- if $with_serverless }}
## Catalog is required for serverless compute
catalog: main{{else}}
## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog:
# catalog: catalog_name
# catalog: catalog_name{{end}}
{{- else}}
catalog: {{default_catalog}}
{{- end}}
target: {{.project_name}}_${bundle.target}
{{- if $with_serverless }}
serverless: true
{{- end}}
libraries:
- notebook:
path: ../src/dlt_pipeline.ipynb