Support serverless in default-python

It's not interesting since it just dumps what is in the repo. This is
especially annoying with bundle/templates tests with a lot of files.
This commit is contained in:
Denis Bilenko 2025-02-04 13:12:06 +01:00
parent ce7e64062b
commit dcee3db389
16 changed files with 263 additions and 19 deletions

View File

@ -0,0 +1,6 @@
{
"project_name": "my_default_python",
"include_notebook": "yes",
"include_dlt": "yes",
"include_python": "yes"
}

View File

@ -0,0 +1,27 @@
--- [TESTROOT]/bundle/templates/default-python-classic/../default-python/output/my_default_python/resources/my_default_python.job.yml
+++ output/my_default_python/resources/my_default_python.job.yml
@@ -17,4 +17,5 @@
tasks:
- task_key: notebook_task
+ job_cluster_key: job_cluster
notebook_task:
notebook_path: ../src/notebook.ipynb
@@ -29,5 +30,5 @@
depends_on:
- task_key: refresh_pipeline
- environment_key: default
+ job_cluster_key: job_cluster
python_wheel_task:
package_name: my_default_python
@@ -39,3 +40,10 @@
- whl: ../dist/*.whl
-
+ job_clusters:
+ - job_cluster_key: job_cluster
+ new_cluster:
+ spark_version: 15.4.x-scala2.12
+ node_type_id: i3.xlarge
+ autoscale:
+ min_workers: 1
+ max_workers: 4

View File

@ -0,0 +1,30 @@
>>> [CLI] bundle init default-python --config-file ./input.json --output-dir output
Welcome to the default Python template for Databricks Asset Bundles!
Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL]
✨ Your new project has been created in the 'my_default_python' directory!
Please refer to the README.md file for "getting started" instructions.
See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html.
>>> [CLI] bundle validate -t dev
Name: my_default_python
Target: dev
Workspace:
Host: [DATABRICKS_URL]
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/dev
Validation OK!
>>> [CLI] bundle validate -t prod
Name: my_default_python
Target: prod
Workspace:
Host: [DATABRICKS_URL]
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/prod
Validation OK!

View File

@ -0,0 +1,15 @@
trace $CLI bundle init default-python --config-file ./input.json --output-dir output
cd output/my_default_python
trace $CLI bundle validate -t dev
trace $CLI bundle validate -t prod
# Do not affect this repository's git behaviour #2318
mv .gitignore out.gitignore
cd ../../
# Calculate the difference from the standard (serverless) template
diff.py $TESTDIR/../default-python/output output/ > out.compare-vs-serverless.diff
rm -fr output

View File

@ -0,0 +1,7 @@
# At the moment, there are many differences across different envs w.r.t to catalog use, node type and so on.
LocalOnly = true
SaveRepls = true
[[Server]]
Pattern = "GET /api/2.0/settings-api/workspace/{workspaceId}/serverless_job_nb"
Response.Body = '{}'

View File

@ -0,0 +1,6 @@
{
"project_name": "my_default_python",
"include_notebook": "yes",
"include_dlt": "yes",
"include_python": "yes"
}

View File

@ -0,0 +1,27 @@
--- [TESTROOT]/bundle/templates/default-python-errorserverless/../default-python/output/my_default_python/resources/my_default_python.job.yml
+++ output/my_default_python/resources/my_default_python.job.yml
@@ -17,4 +17,5 @@
tasks:
- task_key: notebook_task
+ job_cluster_key: job_cluster
notebook_task:
notebook_path: ../src/notebook.ipynb
@@ -29,5 +30,5 @@
depends_on:
- task_key: refresh_pipeline
- environment_key: default
+ job_cluster_key: job_cluster
python_wheel_task:
package_name: my_default_python
@@ -39,3 +40,10 @@
- whl: ../dist/*.whl
-
+ job_clusters:
+ - job_cluster_key: job_cluster
+ new_cluster:
+ spark_version: 15.4.x-scala2.12
+ node_type_id: i3.xlarge
+ autoscale:
+ min_workers: 1
+ max_workers: 4

View File

@ -0,0 +1,31 @@
>>> [CLI] bundle init default-python --config-file ./input.json --output-dir output
Welcome to the default Python template for Databricks Asset Bundles!
Warn: Failed to detect if serverless is supported: /api/2.0/settings-api/workspace/900800700600/serverless_job_nb failed: Internal Server Error
Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL]
✨ Your new project has been created in the 'my_default_python' directory!
Please refer to the README.md file for "getting started" instructions.
See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html.
>>> [CLI] bundle validate -t dev
Name: my_default_python
Target: dev
Workspace:
Host: [DATABRICKS_URL]
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/dev
Validation OK!
>>> [CLI] bundle validate -t prod
Name: my_default_python
Target: prod
Workspace:
Host: [DATABRICKS_URL]
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/prod
Validation OK!

View File

@ -0,0 +1,15 @@
trace $CLI bundle init default-python --config-file ./input.json --output-dir output
cd output/my_default_python
trace $CLI bundle validate -t dev
trace $CLI bundle validate -t prod
# Do not affect this repository's git behaviour #2318
mv .gitignore out.gitignore
cd ../../
# Calculate the difference from the standard (serverless) template
diff.py $TESTDIR/../default-python/output output/ > out.compare-vs-serverless.diff
rm -fr output

View File

@ -0,0 +1,7 @@
# At the moment, there are many differences across different envs w.r.t to catalog use, node type and so on.
LocalOnly = true
SaveRepls = true
[[Server]]
Pattern = "GET /api/2.0/settings-api/workspace/{workspaceId}/serverless_job_nb"
Response.StatusCode = 500

View File

@ -16,7 +16,6 @@ resources:
tasks:
- task_key: notebook_task
job_cluster_key: job_cluster
notebook_task:
notebook_path: ../src/notebook.ipynb
@ -29,7 +28,7 @@ resources:
- task_key: main_task
depends_on:
- task_key: refresh_pipeline
job_cluster_key: job_cluster
environment_key: default
python_wheel_task:
package_name: my_default_python
entry_point: main
@ -39,11 +38,4 @@ resources:
# for more information on how to add other libraries.
- whl: ../dist/*.whl
job_clusters:
- job_cluster_key: job_cluster
new_cluster:
spark_version: 15.4.x-scala2.12
node_type_id: i3.xlarge
autoscale:
min_workers: 1
max_workers: 4

View File

@ -1,2 +1,6 @@
# At the moment, there are many differences across different envs w.r.t to catalog use, node type and so on.
LocalOnly = true
[[Server]]
Pattern = "GET /api/2.0/settings-api/workspace/{workspaceId}/serverless_job_nb"
Response.Body = '''{"setting": {"value": {"preview_enablement_val": {"enabled": true}}}}'''

View File

@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"math/rand"
"net/http"
"net/url"
"os"
"regexp"
@ -12,7 +13,10 @@ import (
"github.com/databricks/cli/cmd/root"
"github.com/databricks/cli/libs/iamutil"
"github.com/databricks/cli/libs/log"
"github.com/databricks/databricks-sdk-go"
"github.com/databricks/databricks-sdk-go/apierr"
"github.com/databricks/databricks-sdk-go/client"
"github.com/databricks/databricks-sdk-go/service/iam"
"github.com/google/uuid"
@ -35,8 +39,11 @@ var (
cachedUser *iam.User
cachedIsServicePrincipal *bool
cachedCatalog *string
cachedIsServerless *bool
)
const defaultServerlessSupported = false
// UUID that is stable for the duration of the template execution. This can be used
// to populate the `bundle.uuid` field in databricks.yml by template authors.
//
@ -44,6 +51,16 @@ var (
// is run and can be used to attribute DBU revenue to bundle templates.
var bundleUuid = uuid.New().String()
type APISettingResponse struct {
Setting struct {
Value struct {
PreviewEnablementVal struct {
Enabled bool `json:"enabled"`
} `json:"preview_enablement_val"`
} `json:"value"`
} `json:"setting"`
}
func loadHelpers(ctx context.Context) template.FuncMap {
w := root.WorkspaceClient(ctx)
return template.FuncMap{
@ -167,5 +184,43 @@ func loadHelpers(ctx context.Context) template.FuncMap {
cachedIsServicePrincipal = &result
return result, nil
},
"is_serverless_supported": func() bool {
if cachedIsServerless == nil {
result := isServerlessSupported(ctx, w)
cachedIsServerless = &result
}
return *cachedIsServerless
},
}
}
func isServerlessSupported(ctx context.Context, w *databricks.WorkspaceClient) bool {
apiClient, err := client.New(w.Config)
if err != nil {
log.Warnf(ctx, "Failed to detect if serverless is supported: cannot create client: %s", err)
return defaultServerlessSupported
}
workspaceId, err := w.CurrentWorkspaceID(ctx)
if err != nil {
log.Warnf(ctx, "Failed to detect if serverless is supported: CurrentWorkspaceID() failed: %s", err)
return defaultServerlessSupported
}
apiEndpoint := fmt.Sprintf("/api/2.0/settings-api/workspace/%d/serverless_job_nb", workspaceId)
var response APISettingResponse
err = apiClient.Do(
ctx,
http.MethodGet,
apiEndpoint,
nil,
nil,
nil,
&response,
)
if err != nil {
log.Warnf(ctx, "Failed to detect if serverless is supported: %s failed: %s", apiEndpoint, err)
return defaultServerlessSupported
}
return response.Setting.Value.PreviewEnablementVal.Enabled
}

View File

@ -116,11 +116,13 @@ func TestBuiltinPythonTemplateValid(t *testing.T) {
for _, includeDlt := range options {
for _, includePython := range options {
for _, isServicePrincipal := range []bool{true, false} {
for _, useServerless := range options {
config := map[string]any{
"project_name": "my_project",
"include_notebook": includeNotebook,
"include_dlt": includeDlt,
"include_python": includePython,
"use_serverless": useServerless,
}
tempDir := t.TempDir()
assertBuiltinTemplateValid(t, "default-python", config, "dev", isServicePrincipal, build, tempDir)
@ -128,6 +130,7 @@ func TestBuiltinPythonTemplateValid(t *testing.T) {
}
}
}
}
// Test prod mode + build
config := map[string]any{
@ -135,6 +138,7 @@ func TestBuiltinPythonTemplateValid(t *testing.T) {
"include_notebook": "yes",
"include_dlt": "yes",
"include_python": "yes",
"use_serverless": "yes",
}
isServicePrincipal = false
build = true

View File

@ -29,6 +29,14 @@
"enum": ["yes", "no"],
"description": "Include a stub (sample) Python package in '{{.project_name}}{{path_separator}}src'",
"order": 4
},
"use_serverless": {
"type": "string",
"default": "auto",
"enum": ["yes", "no", "auto"],
"description": "Use serverless compute. If auto, it will be enabled unless it is disabled on workspace level.",
"order": 5,
"skip_prompt_if": {}
}
},
"success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html."

View File

@ -4,6 +4,10 @@
{{if and (eq .include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}}
# This job runs {{.project_name}}_pipeline on a schedule.
{{end -}}
{{$with_serverless := (eq .use_serverless "yes") -}}
{{if (eq .use_serverless "auto") -}}
{{$with_serverless = is_serverless_supported -}}
{{end -}}
resources:
jobs:
@ -29,7 +33,8 @@ resources:
tasks:
{{- if eq .include_notebook "yes" }}
- task_key: notebook_task
job_cluster_key: job_cluster
{{- if not $with_serverless}}
job_cluster_key: job_cluster{{end}}
notebook_task:
notebook_path: ../src/notebook.ipynb
{{end -}}
@ -52,7 +57,10 @@ resources:
depends_on:
- task_key: notebook_task
{{end}}
job_cluster_key: job_cluster
{{- if $with_serverless }}
environment_key: default
{{- else }}
job_cluster_key: job_cluster{{end}}
python_wheel_task:
package_name: {{.project_name}}
entry_point: main
@ -64,6 +72,7 @@ resources:
{{else}}
{{end -}}
{{- if not $with_serverless -}}
job_clusters:
- job_cluster_key: job_cluster
new_cluster:
@ -72,3 +81,4 @@ resources:
autoscale:
min_workers: 1
max_workers: 4
{{- end}}