mirror of https://github.com/databricks/cli.git
Support serverless mode in default-python template (explicit prompt) (#2377)
## Changes - Add 'serverless' prompt to default-python template (default is currently set to "no"). - This is a simplified version of https://github.com/databricks/cli/pull/2348 with 'auto' functionality removed. ## Tests - Split default-python into default-python/classic, default-python/serverless, default-python/serverless-customcatalog. - Manually check that "bundle init default-python" with serverless=yes can be deployed and run on dogfood and test env.
This commit is contained in:
parent
df001dcdfe
commit
03f2ff5a39
|
@ -2,5 +2,6 @@
|
|||
"project_name": "my_default_python",
|
||||
"include_notebook": "yes",
|
||||
"include_dlt": "yes",
|
||||
"include_python": "yes"
|
||||
"include_python": "yes",
|
||||
"serverless": "no"
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
--- [TESTROOT]/bundle/templates/default-python/classic/../serverless/output/my_default_python/resources/my_default_python.job.yml
|
||||
+++ output/my_default_python/resources/my_default_python.job.yml
|
||||
@@ -17,4 +17,5 @@
|
||||
tasks:
|
||||
- task_key: notebook_task
|
||||
+ job_cluster_key: job_cluster
|
||||
notebook_task:
|
||||
notebook_path: ../src/notebook.ipynb
|
||||
@@ -29,17 +30,21 @@
|
||||
depends_on:
|
||||
- task_key: refresh_pipeline
|
||||
- environment_key: default
|
||||
+ job_cluster_key: job_cluster
|
||||
python_wheel_task:
|
||||
package_name: my_default_python
|
||||
entry_point: main
|
||||
+ libraries:
|
||||
+ # By default we just include the .whl file generated for the my_default_python package.
|
||||
+ # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
|
||||
+ # for more information on how to add other libraries.
|
||||
+ - whl: ../dist/*.whl
|
||||
|
||||
- # A list of task execution environment specifications that can be referenced by tasks of this job.
|
||||
- environments:
|
||||
- - environment_key: default
|
||||
-
|
||||
- # Full documentation of this spec can be found at:
|
||||
- # https://docs.databricks.com/api/workspace/jobs/create#environments-spec
|
||||
- spec:
|
||||
- client: "1"
|
||||
- dependencies:
|
||||
- - ../dist/*.whl
|
||||
+ job_clusters:
|
||||
+ - job_cluster_key: job_cluster
|
||||
+ new_cluster:
|
||||
+ spark_version: 15.4.x-scala2.12
|
||||
+ node_type_id: i3.xlarge
|
||||
+ data_security_mode: SINGLE_USER
|
||||
+ autoscale:
|
||||
+ min_workers: 1
|
||||
+ max_workers: 4
|
||||
--- [TESTROOT]/bundle/templates/default-python/classic/../serverless/output/my_default_python/resources/my_default_python.pipeline.yml
|
||||
+++ output/my_default_python/resources/my_default_python.pipeline.yml
|
||||
@@ -4,8 +4,7 @@
|
||||
my_default_python_pipeline:
|
||||
name: my_default_python_pipeline
|
||||
- ## Catalog is required for serverless compute
|
||||
- catalog: main
|
||||
+ ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog:
|
||||
+ # catalog: catalog_name
|
||||
target: my_default_python_${bundle.target}
|
||||
- serverless: true
|
||||
libraries:
|
||||
- notebook:
|
|
@ -0,0 +1,15 @@
|
|||
trace $CLI bundle init default-python --config-file ./input.json --output-dir output
|
||||
|
||||
cd output/my_default_python
|
||||
trace $CLI bundle validate -t dev
|
||||
trace $CLI bundle validate -t prod
|
||||
|
||||
# Do not affect this repository's git behaviour #2318
|
||||
mv .gitignore out.gitignore
|
||||
|
||||
cd ../../
|
||||
|
||||
# Calculate the difference from the serverless template
|
||||
diff.py $TESTDIR/../serverless/output output/ > out.compare-vs-serverless.diff
|
||||
|
||||
rm -fr output
|
|
@ -0,0 +1,22 @@
|
|||
|
||||
>>> [CLI] bundle init default-python --config-file [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/input.json --output-dir output
|
||||
|
||||
Welcome to the default Python template for Databricks Asset Bundles!
|
||||
Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL]
|
||||
|
||||
✨ Your new project has been created in the 'my_default_python' directory!
|
||||
|
||||
Please refer to the README.md file for "getting started" instructions.
|
||||
See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html.
|
||||
|
||||
>>> diff.py [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output output/
|
||||
--- [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output/my_default_python/resources/my_default_python.pipeline.yml
|
||||
+++ output/my_default_python/resources/my_default_python.pipeline.yml
|
||||
@@ -4,6 +4,5 @@
|
||||
my_default_python_pipeline:
|
||||
name: my_default_python_pipeline
|
||||
- ## Catalog is required for serverless compute
|
||||
- catalog: main
|
||||
+ catalog: customcatalog
|
||||
target: my_default_python_${bundle.target}
|
||||
serverless: true
|
|
@ -0,0 +1,4 @@
|
|||
trace $CLI bundle init default-python --config-file $TESTDIR/../serverless/input.json --output-dir output
|
||||
mv output/my_default_python/.gitignore output/my_default_python/out.gitignore
|
||||
trace diff.py $TESTDIR/../serverless/output output/
|
||||
rm -fr output
|
|
@ -0,0 +1,8 @@
|
|||
[[Server]]
|
||||
Pattern = "GET /api/2.1/unity-catalog/current-metastore-assignment"
|
||||
Response.Body = '{"default_catalog_name": "customcatalog"}'
|
||||
|
||||
[[Repls]]
|
||||
# windows fix
|
||||
Old = '\\'
|
||||
New = '/'
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"project_name": "my_default_python",
|
||||
"include_notebook": "yes",
|
||||
"include_dlt": "yes",
|
||||
"include_python": "yes",
|
||||
"serverless": "yes"
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
|
||||
>>> [CLI] bundle init default-python --config-file ./input.json --output-dir output
|
||||
|
||||
Welcome to the default Python template for Databricks Asset Bundles!
|
||||
Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL]
|
||||
|
||||
✨ Your new project has been created in the 'my_default_python' directory!
|
||||
|
||||
Please refer to the README.md file for "getting started" instructions.
|
||||
See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html.
|
||||
|
||||
>>> [CLI] bundle validate -t dev
|
||||
Name: my_default_python
|
||||
Target: dev
|
||||
Workspace:
|
||||
Host: [DATABRICKS_URL]
|
||||
User: [USERNAME]
|
||||
Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/dev
|
||||
|
||||
Validation OK!
|
||||
|
||||
>>> [CLI] bundle validate -t prod
|
||||
Name: my_default_python
|
||||
Target: prod
|
||||
Workspace:
|
||||
Host: [DATABRICKS_URL]
|
||||
User: [USERNAME]
|
||||
Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/prod
|
||||
|
||||
Validation OK!
|
|
@ -16,7 +16,6 @@ resources:
|
|||
|
||||
tasks:
|
||||
- task_key: notebook_task
|
||||
job_cluster_key: job_cluster
|
||||
notebook_task:
|
||||
notebook_path: ../src/notebook.ipynb
|
||||
|
||||
|
@ -29,22 +28,18 @@ resources:
|
|||
- task_key: main_task
|
||||
depends_on:
|
||||
- task_key: refresh_pipeline
|
||||
job_cluster_key: job_cluster
|
||||
environment_key: default
|
||||
python_wheel_task:
|
||||
package_name: my_default_python
|
||||
entry_point: main
|
||||
libraries:
|
||||
# By default we just include the .whl file generated for the my_default_python package.
|
||||
# See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
|
||||
# for more information on how to add other libraries.
|
||||
- whl: ../dist/*.whl
|
||||
|
||||
job_clusters:
|
||||
- job_cluster_key: job_cluster
|
||||
new_cluster:
|
||||
spark_version: 15.4.x-scala2.12
|
||||
node_type_id: i3.xlarge
|
||||
data_security_mode: SINGLE_USER
|
||||
autoscale:
|
||||
min_workers: 1
|
||||
max_workers: 4
|
||||
# A list of task execution environment specifications that can be referenced by tasks of this job.
|
||||
environments:
|
||||
- environment_key: default
|
||||
|
||||
# Full documentation of this spec can be found at:
|
||||
# https://docs.databricks.com/api/workspace/jobs/create#environments-spec
|
||||
spec:
|
||||
client: "1"
|
||||
dependencies:
|
||||
- ../dist/*.whl
|
|
@ -3,8 +3,10 @@ resources:
|
|||
pipelines:
|
||||
my_default_python_pipeline:
|
||||
name: my_default_python_pipeline
|
||||
## Catalog is required for serverless compute
|
||||
catalog: main
|
||||
target: my_default_python_${bundle.target}
|
||||
serverless: true
|
||||
libraries:
|
||||
- notebook:
|
||||
path: ../src/dlt_pipeline.ipynb
|
|
@ -20,6 +20,12 @@ var testUser = iam.User{
|
|||
UserName: "tester@databricks.com",
|
||||
}
|
||||
|
||||
var testMetastore = catalog.MetastoreAssignment{
|
||||
DefaultCatalogName: "hive_metastore",
|
||||
MetastoreId: "120efa64-9b68-46ba-be38-f319458430d2",
|
||||
WorkspaceId: 470123456789500,
|
||||
}
|
||||
|
||||
func AddHandlers(server *testserver.Server) {
|
||||
server.Handle("GET", "/api/2.0/policies/clusters/list", func(req testserver.Request) any {
|
||||
return compute.ListPoliciesResponse{
|
||||
|
@ -106,9 +112,7 @@ func AddHandlers(server *testserver.Server) {
|
|||
})
|
||||
|
||||
server.Handle("GET", "/api/2.1/unity-catalog/current-metastore-assignment", func(req testserver.Request) any {
|
||||
return catalog.MetastoreAssignment{
|
||||
DefaultCatalogName: "main",
|
||||
}
|
||||
return testMetastore
|
||||
})
|
||||
|
||||
server.Handle("GET", "/api/2.0/permissions/directories/{objectId}", func(req testserver.Request) any {
|
||||
|
|
|
@ -116,14 +116,17 @@ func TestBuiltinPythonTemplateValid(t *testing.T) {
|
|||
for _, includeDlt := range options {
|
||||
for _, includePython := range options {
|
||||
for _, isServicePrincipal := range []bool{true, false} {
|
||||
config := map[string]any{
|
||||
"project_name": "my_project",
|
||||
"include_notebook": includeNotebook,
|
||||
"include_dlt": includeDlt,
|
||||
"include_python": includePython,
|
||||
for _, serverless := range options {
|
||||
config := map[string]any{
|
||||
"project_name": "my_project",
|
||||
"include_notebook": includeNotebook,
|
||||
"include_dlt": includeDlt,
|
||||
"include_python": includePython,
|
||||
"serverless": serverless,
|
||||
}
|
||||
tempDir := t.TempDir()
|
||||
assertBuiltinTemplateValid(t, "default-python", config, "dev", isServicePrincipal, build, tempDir)
|
||||
}
|
||||
tempDir := t.TempDir()
|
||||
assertBuiltinTemplateValid(t, "default-python", config, "dev", isServicePrincipal, build, tempDir)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -135,6 +138,7 @@ func TestBuiltinPythonTemplateValid(t *testing.T) {
|
|||
"include_notebook": "yes",
|
||||
"include_dlt": "yes",
|
||||
"include_python": "yes",
|
||||
"serverless": "yes",
|
||||
}
|
||||
isServicePrincipal = false
|
||||
build = true
|
||||
|
|
|
@ -29,6 +29,13 @@
|
|||
"enum": ["yes", "no"],
|
||||
"description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'",
|
||||
"order": 4
|
||||
},
|
||||
"serverless": {
|
||||
"type": "string",
|
||||
"default": "no",
|
||||
"enum": ["yes", "no"],
|
||||
"description": "Use serverless compute",
|
||||
"order": 5
|
||||
}
|
||||
},
|
||||
"success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html."
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
{{if and (eq .include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}}
|
||||
# This job runs {{.project_name}}_pipeline on a schedule.
|
||||
{{end -}}
|
||||
{{$with_serverless := (eq .serverless "yes") -}}
|
||||
|
||||
resources:
|
||||
jobs:
|
||||
|
@ -29,7 +30,8 @@ resources:
|
|||
tasks:
|
||||
{{- if eq .include_notebook "yes" }}
|
||||
- task_key: notebook_task
|
||||
job_cluster_key: job_cluster
|
||||
{{- if not $with_serverless}}
|
||||
job_cluster_key: job_cluster{{end}}
|
||||
notebook_task:
|
||||
notebook_path: ../src/notebook.ipynb
|
||||
{{end -}}
|
||||
|
@ -52,18 +54,34 @@ resources:
|
|||
depends_on:
|
||||
- task_key: notebook_task
|
||||
{{end}}
|
||||
job_cluster_key: job_cluster
|
||||
{{- if $with_serverless }}
|
||||
environment_key: default
|
||||
{{- else }}
|
||||
job_cluster_key: job_cluster{{end}}
|
||||
python_wheel_task:
|
||||
package_name: {{.project_name}}
|
||||
entry_point: main
|
||||
{{- if not $with_serverless }}
|
||||
libraries:
|
||||
# By default we just include the .whl file generated for the {{.project_name}} package.
|
||||
# See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
|
||||
# for more information on how to add other libraries.
|
||||
- whl: ../dist/*.whl
|
||||
{{- end -}}
|
||||
{{else}}
|
||||
{{- end}}
|
||||
{{if $with_serverless}}
|
||||
# A list of task execution environment specifications that can be referenced by tasks of this job.
|
||||
environments:
|
||||
- environment_key: default
|
||||
|
||||
{{else}}
|
||||
{{end -}}
|
||||
# Full documentation of this spec can be found at:
|
||||
# https://docs.databricks.com/api/workspace/jobs/create#environments-spec
|
||||
spec:
|
||||
client: "1"
|
||||
dependencies:
|
||||
- ../dist/*.whl
|
||||
{{ else }}
|
||||
job_clusters:
|
||||
- job_cluster_key: job_cluster
|
||||
new_cluster:
|
||||
|
@ -73,3 +91,4 @@ resources:
|
|||
autoscale:
|
||||
min_workers: 1
|
||||
max_workers: 4
|
||||
{{end -}}
|
||||
|
|
|
@ -1,15 +1,22 @@
|
|||
{{$with_serverless := (eq .serverless "yes") -}}
|
||||
# The main pipeline for {{.project_name}}
|
||||
resources:
|
||||
pipelines:
|
||||
{{.project_name}}_pipeline:
|
||||
name: {{.project_name}}_pipeline
|
||||
{{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}}
|
||||
{{- if $with_serverless }}
|
||||
## Catalog is required for serverless compute
|
||||
catalog: main{{else}}
|
||||
## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog:
|
||||
# catalog: catalog_name
|
||||
# catalog: catalog_name{{end}}
|
||||
{{- else}}
|
||||
catalog: {{default_catalog}}
|
||||
{{- end}}
|
||||
target: {{.project_name}}_${bundle.target}
|
||||
{{- if $with_serverless }}
|
||||
serverless: true
|
||||
{{- end}}
|
||||
libraries:
|
||||
- notebook:
|
||||
path: ../src/dlt_pipeline.ipynb
|
||||
|
|
Loading…
Reference in New Issue