Merge remote-tracking branch 'origin' into async-logger-clean

This commit is contained in:
Shreyas Goenka 2025-02-27 11:47:34 +01:00
commit a556d51164
No known key found for this signature in database
GPG Key ID: 92A07DF49CCB0622
137 changed files with 1657 additions and 327 deletions

View File

@ -1 +1 @@
c72c58f97b950fcb924a90ef164bcb10cfcd5ece
99f644e72261ef5ecf8d74db20f4b7a1e09723cc

View File

@ -179,7 +179,7 @@ func new{{.PascalName}}() *cobra.Command {
{{- $wait := and .Wait (and (not .IsCrudRead) (not (eq .SnakeName "get_run"))) -}}
{{- $hasRequiredArgs := and (not $hasIdPrompt) $hasPosArgs -}}
{{- $hasSingleRequiredRequestBodyFieldWithPrompt := and (and $hasIdPrompt $request) (eq 1 (len $request.RequiredRequestBodyFields)) -}}
{{- $onlyPathArgsRequiredAsPositionalArguments := and $request (eq (len .RequiredPositionalArguments) (len $request.RequiredPathFields)) -}}
{{- $onlyPathArgsRequiredAsPositionalArguments := and .Request (eq (len .RequiredPositionalArguments) (len .Request.RequiredPathFields)) -}}
{{- $hasDifferentArgsWithJsonFlag := and (not $onlyPathArgsRequiredAsPositionalArguments) (and $canUseJson (or $request.HasRequiredRequestBodyFields )) -}}
{{- $hasCustomArgHandler := or $hasRequiredArgs $hasDifferentArgsWithJsonFlag -}}
@ -218,12 +218,12 @@ func new{{.PascalName}}() *cobra.Command {
cmd.Args = func(cmd *cobra.Command, args []string) error {
{{- if $hasDifferentArgsWithJsonFlag }}
if cmd.Flags().Changed("json") {
err := root.ExactArgs({{len $request.RequiredPathFields}})(cmd, args)
err := root.ExactArgs({{len .Request.RequiredPathFields}})(cmd, args)
if err != nil {
{{- if eq 0 (len $request.RequiredPathFields) }}
{{- if eq 0 (len .Request.RequiredPathFields) }}
return fmt.Errorf("when --json flag is specified, no positional arguments are required. Provide{{- range $index, $field := $request.RequiredFields}}{{if $index}},{{end}} '{{$field.Name}}'{{end}} in your JSON input")
{{- else }}
return fmt.Errorf("when --json flag is specified, provide only{{- range $index, $field := $request.RequiredPathFields}}{{if $index}},{{end}} {{$field.ConstantName}}{{end}} as positional arguments. Provide{{- range $index, $field := $request.RequiredRequestBodyFields}}{{if $index}},{{end}} '{{$field.Name}}'{{end}} in your JSON input")
return fmt.Errorf("when --json flag is specified, provide only{{- range $index, $field := .Request.RequiredPathFields}}{{if $index}},{{end}} {{$field.ConstantName}}{{end}} as positional arguments. Provide{{- range $index, $field := $request.RequiredRequestBodyFields}}{{if $index}},{{end}} '{{$field.Name}}'{{end}} in your JSON input")
{{- end }}
}
return nil

1
.gitignore vendored
View File

@ -25,6 +25,7 @@ coverage-acceptance.txt
__pycache__
*.pyc
.idea
.vscode/launch.json
.vscode/tasks.json

View File

@ -217,8 +217,12 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont
}
cloudEnv := os.Getenv("CLOUD_ENV")
if config.LocalOnly && cloudEnv != "" {
t.Skipf("Disabled via LocalOnly setting in %s (CLOUD_ENV=%s)", configPath, cloudEnv)
if !isTruePtr(config.Local) && cloudEnv == "" {
t.Skipf("Disabled via Local setting in %s (CLOUD_ENV=%s)", configPath, cloudEnv)
}
if !isTruePtr(config.Cloud) && cloudEnv != "" {
t.Skipf("Disabled via Cloud setting in %s (CLOUD_ENV=%s)", configPath, cloudEnv)
}
var tmpDir string
@ -263,9 +267,9 @@ func runTest(t *testing.T, dir, coverDir string, repls testdiff.ReplacementsCont
databricksLocalHost := os.Getenv("DATABRICKS_DEFAULT_HOST")
if len(config.Server) > 0 || config.RecordRequests {
if len(config.Server) > 0 || isTruePtr(config.RecordRequests) {
server = testserver.New(t)
if config.RecordRequests {
if isTruePtr(config.RecordRequests) {
requestsPath := filepath.Join(tmpDir, "out.requests.txt")
server.RecordRequestsCallback = func(request *testserver.Request) {
req := getLoggedRequest(request, config.IncludeRequestHeaders)
@ -703,3 +707,7 @@ func filterHeaders(h http.Header, includedHeaders []string) http.Header {
}
return headers
}
func isTruePtr(value *bool) bool {
return value != nil && *value
}

View File

@ -11,9 +11,9 @@
>>> errcode [CLI] current-user me -t dev -p DEFAULT
"[USERNAME]"
=== Inside the bundle, profile flag not matching bundle host. Badness: should use profile from flag instead and not fail
=== Inside the bundle, profile flag not matching bundle host. Should use profile from the flag and not the bundle.
>>> errcode [CLI] current-user me -p profile_name
Error: cannot resolve bundle auth configuration: config host mismatch: profile uses host https://non-existing-subdomain.databricks.com, but CLI configured to use [DATABRICKS_TARGET]
Error: Get "https://non-existing-subdomain.databricks.com/api/2.0/preview/scim/v2/Me": (redacted)
Exit code: 1
@ -23,6 +23,65 @@ Error: cannot resolve bundle auth configuration: config host mismatch: profile u
Exit code: 1
=== Bundle commands load bundle configuration when no flags, validation OK
>>> errcode [CLI] bundle validate
Name: test-auth
Target: dev
Workspace:
Host: [DATABRICKS_TARGET]
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/test-auth/dev
Validation OK!
=== Bundle commands load bundle configuration with -t flag, validation OK
>>> errcode [CLI] bundle validate -t dev
Name: test-auth
Target: dev
Workspace:
Host: [DATABRICKS_TARGET]
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/test-auth/dev
Validation OK!
=== Bundle commands load bundle configuration with -p flag, validation not OK (profile host don't match bundle host)
>>> errcode [CLI] bundle validate -p profile_name
Error: cannot resolve bundle auth configuration: config host mismatch: profile uses host https://non-existing-subdomain.databricks.com, but CLI configured to use [DATABRICKS_TARGET]
Name: test-auth
Target: dev
Workspace:
Host: [DATABRICKS_TARGET]
Found 1 error
Exit code: 1
=== Bundle commands load bundle configuration with -t and -p flag, validation OK (profile host match bundle host)
>>> errcode [CLI] bundle validate -t dev -p DEFAULT
Name: test-auth
Target: dev
Workspace:
Host: [DATABRICKS_TARGET]
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/test-auth/dev
Validation OK!
=== Bundle commands load bundle configuration with -t and -p flag, validation not OK (profile host don't match bundle host)
>>> errcode [CLI] bundle validate -t prod -p DEFAULT
Error: cannot resolve bundle auth configuration: config host mismatch: profile uses host [DATABRICKS_TARGET], but CLI configured to use https://bar.com
Name: test-auth
Target: prod
Workspace:
Host: https://bar.com
Found 1 error
Exit code: 1
=== Outside the bundle, no flags
>>> errcode [CLI] current-user me
"[USERNAME]"

View File

@ -15,12 +15,27 @@ trace errcode $CLI current-user me -t dev | jq .userName
title "Inside the bundle, target and matching profile"
trace errcode $CLI current-user me -t dev -p DEFAULT | jq .userName
title "Inside the bundle, profile flag not matching bundle host. Badness: should use profile from flag instead and not fail"
title "Inside the bundle, profile flag not matching bundle host. Should use profile from the flag and not the bundle."
trace errcode $CLI current-user me -p profile_name | jq .userName
title "Inside the bundle, target and not matching profile"
trace errcode $CLI current-user me -t dev -p profile_name
title "Bundle commands load bundle configuration when no flags, validation OK"
trace errcode $CLI bundle validate
title "Bundle commands load bundle configuration with -t flag, validation OK"
trace errcode $CLI bundle validate -t dev
title "Bundle commands load bundle configuration with -p flag, validation not OK (profile host don't match bundle host)"
trace errcode $CLI bundle validate -p profile_name
title "Bundle commands load bundle configuration with -t and -p flag, validation OK (profile host match bundle host)"
trace errcode $CLI bundle validate -t dev -p DEFAULT
title "Bundle commands load bundle configuration with -t and -p flag, validation not OK (profile host don't match bundle host)"
trace errcode $CLI bundle validate -t prod -p DEFAULT
cd ..
export DATABRICKS_HOST=$host
title "Outside the bundle, no flags"

View File

@ -1,5 +1,3 @@
Badness = "When -p flag is used inside the bundle folder for any CLI commands, CLI use bundle host anyway instead of profile one"
# Some of the clouds have DATABRICKS_HOST variable setup without https:// prefix
# In the result, output is replaced with DATABRICKS_URL variable instead of DATABRICKS_HOST
# This is a workaround to replace DATABRICKS_URL with DATABRICKS_HOST
@ -10,3 +8,7 @@ New='DATABRICKS_TARGET'
[[Repls]]
Old='DATABRICKS_URL'
New='DATABRICKS_TARGET'
[[Repls]]
Old='Get "https://non-existing-subdomain.databricks.com/api/2.0/preview/scim/v2/Me": .*'
New='Get "https://non-existing-subdomain.databricks.com/api/2.0/preview/scim/v2/Me": (redacted)'

View File

@ -1,5 +1,3 @@
LocalOnly = true
RecordRequests = true
IncludeRequestHeaders = ["Authorization", "User-Agent"]

View File

@ -43,8 +43,8 @@ def main():
elif f not in set1:
print(f"Only in {d2}: {f}")
else:
a = [replaceAll(patterns, x) for x in p1.read_text().splitlines(True)]
b = [replaceAll(patterns, x) for x in p2.read_text().splitlines(True)]
a = replaceAll(patterns, p1.read_text()).splitlines(True)
b = replaceAll(patterns, p2.read_text()).splitlines(True)
if a != b:
p1_str = p1.as_posix()
p2_str = p2.as_posix()

View File

@ -1,4 +1,4 @@
LocalOnly = true
Cloud = false
[[Repls]]
# The keys are unsorted and also vary per OS

View File

@ -1,4 +1,4 @@
LocalOnly = true # This test needs to run against stubbed Databricks API
Cloud = false # This test needs to run against stubbed Databricks API
[[Server]]
Pattern = "GET /api/2.1/jobs/get"

View File

@ -0,0 +1 @@
Cloud = false

View File

@ -0,0 +1 @@
.databricks

View File

@ -0,0 +1,27 @@
bundle:
name: maven
resources:
jobs:
testjob:
name: test-job
tasks:
- task_key: dbt
spark_jar_task:
main_class_name: com.databricks.example.Main
libraries:
- maven:
coordinates: org.jsoup:jsoup:1.7.2
new_cluster:
spark_version: 15.4.x-scala2.12
node_type_id: i3.xlarge
data_security_mode: SINGLE_USER
num_workers: 0
spark_conf:
spark.master: "local[*, 4]"
spark.databricks.cluster.profile: singleNode
custom_tags:
ResourceClass: SingleNode

View File

@ -0,0 +1,7 @@
[
{
"maven": {
"coordinates": "org.jsoup:jsoup:1.7.2"
}
}
]

View File

@ -0,0 +1,15 @@
>>> [CLI] bundle validate -o json
[
{
"maven": {
"coordinates": "org.jsoup:jsoup:1.7.2"
}
}
]
>>> [CLI] bundle deploy
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/maven/default/files...
Deploying resources...
Updating deployment state...
Deployment complete!

View File

@ -0,0 +1,4 @@
trace $CLI bundle validate -o json | jq '.resources.jobs.testjob.tasks[0].libraries'
trace $CLI bundle deploy
cat out.requests.txt | jq 'select(.path == "/api/2.1/jobs/create")' | jq '.body.tasks[0].libraries' > out.job.libraries.txt
rm out.requests.txt

View File

@ -0,0 +1,5 @@
# We run this test only locally for now because we need to figure out how to do
# bundle destroy on script.cleanup first.
Cloud = false
RecordRequests = true

View File

@ -0,0 +1 @@
.databricks

View File

@ -0,0 +1,32 @@
bundle:
name: pypi
resources:
jobs:
testjob:
name: test-job
tasks:
- task_key: dbt
dbt_task:
project_directory: ./
profiles_directory: dbt_profiles/
commands:
- 'dbt deps --target=${bundle.target}'
- 'dbt seed --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"'
- 'dbt run --target=${bundle.target} --vars "{ dev_schema: ${workspace.current_user.short_name} }"'
libraries:
- pypi:
package: dbt-databricks>=1.8.0,<2.0.0
new_cluster:
spark_version: 15.4.x-scala2.12
node_type_id: i3.xlarge
data_security_mode: SINGLE_USER
num_workers: 0
spark_conf:
spark.master: "local[*, 4]"
spark.databricks.cluster.profile: singleNode
custom_tags:
ResourceClass: SingleNode

View File

@ -0,0 +1,7 @@
[
{
"pypi": {
"package": "dbt-databricks>=1.8.0,<2.0.0"
}
}
]

View File

@ -0,0 +1,15 @@
>>> [CLI] bundle validate -o json
[
{
"pypi": {
"package": "dbt-databricks>=1.8.0,<2.0.0"
}
}
]
>>> [CLI] bundle deploy
Uploading bundle files to /Workspace/Users/[USERNAME]/.bundle/pypi/default/files...
Deploying resources...
Updating deployment state...
Deployment complete!

View File

@ -0,0 +1,4 @@
trace $CLI bundle validate -o json | jq '.resources.jobs.testjob.tasks[0].libraries'
trace $CLI bundle deploy
cat out.requests.txt | jq 'select(.path == "/api/2.1/jobs/create")' | jq '.body.tasks[0].libraries' > out.job.libraries.txt
rm out.requests.txt

View File

@ -0,0 +1,5 @@
# We run this test only locally for now because we need to figure out how to do
# bundle destroy on script.cleanup first.
Cloud = false
RecordRequests = true

View File

@ -1,5 +1,4 @@
Badness = '''(minor) error message is not great: executing "" at <user_name>: error calling user_name:'''
LocalOnly = true
[[Server]]
Pattern = "GET /api/2.0/preview/scim/v2/Me"

View File

@ -1 +0,0 @@
LocalOnly = true

View File

@ -1,2 +1 @@
# Testing template machinery, by default there is no need to check against cloud.
LocalOnly = true
Cloud = false

View File

@ -1,2 +1,15 @@
# DABs
.databricks/
build/
dist/
__pycache__/
*.egg-info
.venv/
scratch/**
!scratch/README.md
.databricks
# dbt
target/
dbt_packages/
dbt_modules/
logs/

View File

@ -2,5 +2,6 @@
"project_name": "my_default_python",
"include_notebook": "yes",
"include_dlt": "yes",
"include_python": "yes"
"include_python": "yes",
"serverless": "no"
}

View File

@ -0,0 +1,54 @@
--- [TESTROOT]/bundle/templates/default-python/classic/../serverless/output/my_default_python/resources/my_default_python.job.yml
+++ output/my_default_python/resources/my_default_python.job.yml
@@ -17,4 +17,5 @@
tasks:
- task_key: notebook_task
+ job_cluster_key: job_cluster
notebook_task:
notebook_path: ../src/notebook.ipynb
@@ -29,17 +30,21 @@
depends_on:
- task_key: refresh_pipeline
- environment_key: default
+ job_cluster_key: job_cluster
python_wheel_task:
package_name: my_default_python
entry_point: main
+ libraries:
+ # By default we just include the .whl file generated for the my_default_python package.
+ # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
+ # for more information on how to add other libraries.
+ - whl: ../dist/*.whl
- # A list of task execution environment specifications that can be referenced by tasks of this job.
- environments:
- - environment_key: default
-
- # Full documentation of this spec can be found at:
- # https://docs.databricks.com/api/workspace/jobs/create#environments-spec
- spec:
- client: "1"
- dependencies:
- - ../dist/*.whl
+ job_clusters:
+ - job_cluster_key: job_cluster
+ new_cluster:
+ spark_version: 15.4.x-scala2.12
+ node_type_id: i3.xlarge
+ data_security_mode: SINGLE_USER
+ autoscale:
+ min_workers: 1
+ max_workers: 4
--- [TESTROOT]/bundle/templates/default-python/classic/../serverless/output/my_default_python/resources/my_default_python.pipeline.yml
+++ output/my_default_python/resources/my_default_python.pipeline.yml
@@ -4,8 +4,7 @@
my_default_python_pipeline:
name: my_default_python_pipeline
- ## Catalog is required for serverless compute
- catalog: main
+ ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog:
+ # catalog: catalog_name
target: my_default_python_${bundle.target}
- serverless: true
libraries:
- notebook:

View File

@ -44,6 +44,7 @@ resources:
new_cluster:
spark_version: 15.4.x-scala2.12
node_type_id: i3.xlarge
data_security_mode: SINGLE_USER
autoscale:
min_workers: 1
max_workers: 4

View File

@ -0,0 +1,14 @@
# The main pipeline for my_default_python
resources:
pipelines:
my_default_python_pipeline:
name: my_default_python_pipeline
## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog:
# catalog: catalog_name
target: my_default_python_${bundle.target}
libraries:
- notebook:
path: ../src/dlt_pipeline.ipynb
configuration:
bundle.sourcePath: ${workspace.file_path}/src

View File

@ -0,0 +1,13 @@
trace $CLI bundle init default-python --config-file ./input.json --output-dir output
cd output/my_default_python
trace $CLI bundle validate -t dev
trace $CLI bundle validate -t prod
# Do not affect this repository's git behaviour #2318
mv .gitignore out.gitignore
cd ../../
# Calculate the difference from the serverless template
diff.py $TESTDIR/../serverless/output output/ > out.compare-vs-serverless.diff

View File

@ -0,0 +1,22 @@
>>> [CLI] bundle init default-python --config-file [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/input.json --output-dir output
Welcome to the default Python template for Databricks Asset Bundles!
Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL]
✨ Your new project has been created in the 'my_default_python' directory!
Please refer to the README.md file for "getting started" instructions.
See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html.
>>> diff.py [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output output/
--- [TESTROOT]/bundle/templates/default-python/serverless-customcatalog/../serverless/output/my_default_python/resources/my_default_python.pipeline.yml
+++ output/my_default_python/resources/my_default_python.pipeline.yml
@@ -4,6 +4,5 @@
my_default_python_pipeline:
name: my_default_python_pipeline
- ## Catalog is required for serverless compute
- catalog: main
+ catalog: customcatalog
target: my_default_python_${bundle.target}
serverless: true

View File

@ -0,0 +1,4 @@
trace $CLI bundle init default-python --config-file $TESTDIR/../serverless/input.json --output-dir output
mv output/my_default_python/.gitignore output/my_default_python/out.gitignore
trace diff.py $TESTDIR/../serverless/output output/
rm -fr output

View File

@ -0,0 +1,8 @@
[[Server]]
Pattern = "GET /api/2.1/unity-catalog/current-metastore-assignment"
Response.Body = '{"default_catalog_name": "customcatalog"}'
[[Repls]]
# windows fix
Old = '\\'
New = '/'

View File

@ -0,0 +1,7 @@
{
"project_name": "my_default_python",
"include_notebook": "yes",
"include_dlt": "yes",
"include_python": "yes",
"serverless": "yes"
}

View File

@ -0,0 +1,30 @@
>>> [CLI] bundle init default-python --config-file ./input.json --output-dir output
Welcome to the default Python template for Databricks Asset Bundles!
Workspace to use (auto-detected, edit in 'my_default_python/databricks.yml'): [DATABRICKS_URL]
✨ Your new project has been created in the 'my_default_python' directory!
Please refer to the README.md file for "getting started" instructions.
See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html.
>>> [CLI] bundle validate -t dev
Name: my_default_python
Target: dev
Workspace:
Host: [DATABRICKS_URL]
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/dev
Validation OK!
>>> [CLI] bundle validate -t prod
Name: my_default_python
Target: prod
Workspace:
Host: [DATABRICKS_URL]
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/my_default_python/prod
Validation OK!

View File

@ -0,0 +1,3 @@
# Typings for Pylance in Visual Studio Code
# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md
from databricks.sdk.runtime import *

View File

@ -0,0 +1,7 @@
{
"recommendations": [
"databricks.databricks",
"ms-python.vscode-pylance",
"redhat.vscode-yaml"
]
}

View File

@ -0,0 +1,16 @@
{
"python.analysis.stubPath": ".vscode",
"jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
"jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
"python.testing.pytestArgs": [
"."
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
"python.analysis.extraPaths": ["src"],
"files.exclude": {
"**/*.egg-info": true,
"**/__pycache__": true,
".pytest_cache": true,
},
}

View File

@ -0,0 +1,49 @@
# my_default_python
The 'my_default_python' project was generated by using the default-python template.
## Getting started
1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html
2. Authenticate to your Databricks workspace, if you have not done so already:
```
$ databricks configure
```
3. To deploy a development copy of this project, type:
```
$ databricks bundle deploy --target dev
```
(Note that "dev" is the default target, so the `--target` parameter
is optional here.)
This deploys everything that's defined for this project.
For example, the default template would deploy a job called
`[dev yourname] my_default_python_job` to your workspace.
You can find that job by opening your workpace and clicking on **Workflows**.
4. Similarly, to deploy a production copy, type:
```
$ databricks bundle deploy --target prod
```
Note that the default job from the template has a schedule that runs every day
(defined in resources/my_default_python.job.yml). The schedule
is paused when deploying in development mode (see
https://docs.databricks.com/dev-tools/bundles/deployment-modes.html).
5. To run a job or pipeline, use the "run" command:
```
$ databricks bundle run
```
6. Optionally, install the Databricks extension for Visual Studio code for local development from
https://docs.databricks.com/dev-tools/vscode-ext.html. It can configure your
virtual environment and setup Databricks Connect for running unit tests locally.
When not using these tools, consult your development environment's documentation
and/or the documentation for Databricks Connect for manually setting up your environment
(https://docs.databricks.com/en/dev-tools/databricks-connect/python/index.html).
7. For documentation on the Databricks asset bundles format used
for this project, and for CI/CD configuration, see
https://docs.databricks.com/dev-tools/bundles/index.html.

View File

@ -0,0 +1,29 @@
# This is a Databricks asset bundle definition for my_default_python.
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
bundle:
name: my_default_python
uuid: [UUID]
include:
- resources/*.yml
targets:
dev:
# The default target uses 'mode: development' to create a development copy.
# - Deployed resources get prefixed with '[dev my_user_name]'
# - Any job schedules and triggers are paused by default.
# See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html.
mode: development
default: true
workspace:
host: [DATABRICKS_URL]
prod:
mode: production
workspace:
host: [DATABRICKS_URL]
# We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy.
root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target}
permissions:
- user_name: [USERNAME]
level: CAN_MANAGE

View File

@ -0,0 +1,22 @@
# Fixtures
This folder is reserved for fixtures, such as CSV files.
Below is an example of how to load fixtures as a data frame:
```
import pandas as pd
import os
def get_absolute_path(*relative_parts):
if 'dbutils' in globals():
base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore
path = os.path.normpath(os.path.join(base_dir, *relative_parts))
return path if path.startswith("/Workspace") else "/Workspace" + path
else:
return os.path.join(*relative_parts)
csv_file = get_absolute_path("..", "fixtures", "mycsv.csv")
df = pd.read_csv(csv_file)
display(df)
```

View File

@ -0,0 +1,8 @@
.databricks/
build/
dist/
__pycache__/
*.egg-info
.venv/
scratch/**
!scratch/README.md

View File

@ -0,0 +1,3 @@
[pytest]
testpaths = tests
pythonpath = src

View File

@ -0,0 +1,29 @@
## requirements-dev.txt: dependencies for local development.
##
## For defining dependencies used by jobs in Databricks Workflows, see
## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
## Add code completion support for DLT
databricks-dlt
## pytest is the default package used for testing
pytest
## Dependencies for building wheel files
setuptools
wheel
## databricks-connect can be used to run parts of this project locally.
## See https://docs.databricks.com/dev-tools/databricks-connect.html.
##
## databricks-connect is automatically installed if you're using Databricks
## extension for Visual Studio Code
## (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html).
##
## To manually install databricks-connect, either follow the instructions
## at https://docs.databricks.com/dev-tools/databricks-connect.html
## to install the package system-wide. Or uncomment the line below to install a
## version of db-connect that corresponds to the Databricks Runtime version used
## for this project.
#
# databricks-connect>=15.4,<15.5

View File

@ -0,0 +1,45 @@
# The main job for my_default_python.
resources:
jobs:
my_default_python_job:
name: my_default_python_job
trigger:
# Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
periodic:
interval: 1
unit: DAYS
email_notifications:
on_failure:
- [USERNAME]
tasks:
- task_key: notebook_task
notebook_task:
notebook_path: ../src/notebook.ipynb
- task_key: refresh_pipeline
depends_on:
- task_key: notebook_task
pipeline_task:
pipeline_id: ${resources.pipelines.my_default_python_pipeline.id}
- task_key: main_task
depends_on:
- task_key: refresh_pipeline
environment_key: default
python_wheel_task:
package_name: my_default_python
entry_point: main
# A list of task execution environment specifications that can be referenced by tasks of this job.
environments:
- environment_key: default
# Full documentation of this spec can be found at:
# https://docs.databricks.com/api/workspace/jobs/create#environments-spec
spec:
client: "1"
dependencies:
- ../dist/*.whl

View File

@ -3,8 +3,10 @@ resources:
pipelines:
my_default_python_pipeline:
name: my_default_python_pipeline
## Catalog is required for serverless compute
catalog: main
target: my_default_python_${bundle.target}
serverless: true
libraries:
- notebook:
path: ../src/dlt_pipeline.ipynb

View File

@ -0,0 +1,4 @@
# scratch
This folder is reserved for personal, exploratory notebooks.
By default these are not committed to Git, as 'scratch' is listed in .gitignore.

View File

@ -0,0 +1,61 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "[UUID]",
"showTitle": false,
"title": ""
}
},
"outputs": [],
"source": [
"import sys\n",
"\n",
"sys.path.append(\"../src\")\n",
"from my_default_python import main\n",
"\n",
"main.get_taxis(spark).show(10)"
]
}
],
"metadata": {
"application/vnd.databricks.v1+notebook": {
"dashboards": [],
"language": "python",
"notebookMetadata": {
"pythonIndentUnit": 2
},
"notebookName": "ipynb-notebook",
"widgets": {}
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -0,0 +1,41 @@
"""
setup.py configuration script describing how to build and package this project.
This file is primarily used by the setuptools library and typically should not
be executed directly. See README.md for how to deploy, test, and run
the my_default_python project.
"""
from setuptools import setup, find_packages
import sys
sys.path.append("./src")
import datetime
import my_default_python
local_version = datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S")
setup(
name="my_default_python",
# We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.)
# to ensure that changes to wheel package are picked up when used on all-purpose clusters
version=my_default_python.__version__ + "+" + local_version,
url="https://databricks.com",
author="[USERNAME]",
description="wheel file based on my_default_python/src",
packages=find_packages(where="./src"),
package_dir={"": "src"},
entry_points={
"packages": [
"main=my_default_python.main:main",
],
},
install_requires=[
# Dependencies in case the output wheel file is used as a library dependency.
# For defining dependencies, when this package is used in Databricks, see:
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
"setuptools"
],
)

View File

@ -0,0 +1,90 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"inputWidgets": {},
"nuid": "[UUID]",
"showTitle": false,
"title": ""
}
},
"source": [
"# DLT pipeline\n",
"\n",
"This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/my_default_python.pipeline.yml."
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"inputWidgets": {},
"nuid": "[UUID]",
"showTitle": false,
"title": ""
}
},
"outputs": [],
"source": [
"# Import DLT and src/my_default_python\n",
"import dlt\n",
"import sys\n",
"\n",
"sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n",
"from pyspark.sql.functions import expr\n",
"from my_default_python import main"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"inputWidgets": {},
"nuid": "[UUID]",
"showTitle": false,
"title": ""
}
},
"outputs": [],
"source": [
"@dlt.view\n",
"def taxi_raw():\n",
" return main.get_taxis(spark)\n",
"\n",
"\n",
"@dlt.table\n",
"def filtered_taxis():\n",
" return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))"
]
}
],
"metadata": {
"application/vnd.databricks.v1+notebook": {
"dashboards": [],
"language": "python",
"notebookMetadata": {
"pythonIndentUnit": 2
},
"notebookName": "dlt_pipeline",
"widgets": {}
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -0,0 +1,25 @@
from pyspark.sql import SparkSession, DataFrame
def get_taxis(spark: SparkSession) -> DataFrame:
return spark.read.table("samples.nyctaxi.trips")
# Create a new Databricks Connect session. If this fails,
# check that you have configured Databricks Connect correctly.
# See https://docs.databricks.com/dev-tools/databricks-connect.html.
def get_spark() -> SparkSession:
try:
from databricks.connect import DatabricksSession
return DatabricksSession.builder.getOrCreate()
except ImportError:
return SparkSession.builder.getOrCreate()
def main():
get_taxis(get_spark()).show(5)
if __name__ == "__main__":
main()

View File

@ -0,0 +1,75 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {},
"inputWidgets": {},
"nuid": "[UUID]",
"showTitle": false,
"title": ""
}
},
"source": [
"# Default notebook\n",
"\n",
"This default notebook is executed using Databricks Workflows as defined in resources/my_default_python.job.yml."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"application/vnd.databricks.v1+cell": {
"cellMetadata": {
"byteLimit": 2048000,
"rowLimit": 10000
},
"inputWidgets": {},
"nuid": "[UUID]",
"showTitle": false,
"title": ""
}
},
"outputs": [],
"source": [
"from my_default_python import main\n",
"\n",
"main.get_taxis(spark).show(10)"
]
}
],
"metadata": {
"application/vnd.databricks.v1+notebook": {
"dashboards": [],
"language": "python",
"notebookMetadata": {
"pythonIndentUnit": 2
},
"notebookName": "notebook",
"widgets": {}
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 0
}

View File

@ -0,0 +1,6 @@
from my_default_python.main import get_taxis, get_spark
def test_main():
taxis = get_taxis(get_spark())
assert taxis.count() > 5

View File

@ -1,2 +1,8 @@
.databricks
.databricks/
build/
dist/
__pycache__/
*.egg-info
.venv/
scratch/**
!scratch/README.md

View File

@ -34,6 +34,7 @@ Warning: Ignoring Databricks CLI version constraint for development build. Requi
"max_workers": 4,
"min_workers": 1
},
"data_security_mode": "SINGLE_USER",
"node_type_id": "i3.xlarge",
"spark_version": "15.4.x-scala2.12"
}

View File

@ -56,6 +56,7 @@ my_jobs_as_code_job = Job.from_dict(
"new_cluster": {
"spark_version": "15.4.x-scala2.12",
"node_type_id": "i3.xlarge",
"data_security_mode": "SINGLE_USER",
"autoscale": {
"min_workers": 1,
"max_workers": 4,

View File

@ -1,2 +1,2 @@
# At the moment, there are many differences across different envs w.r.t to catalog use, node type and so on.
LocalOnly = true
Cloud = false

View File

@ -0,0 +1,2 @@
Local = true
Cloud = true

View File

@ -0,0 +1,37 @@
bundle:
name: trampoline_warning_message
targets:
dev:
mode: development
default: true
prod:
resources:
clusters:
interactive_cluster:
spark_version: 14.2.x-cpu-ml-scala2.12
resources:
clusters:
interactive_cluster:
cluster_name: jobs-as-code-all-purpose-cluster
spark_version: 12.2.x-cpu-ml-scala2.12
node_type_id: r5d.8xlarge
autotermination_minutes: 30
autoscale:
min_workers: 1
max_workers: 1
driver_node_type_id: r5d.8xlarge
jobs:
whl:
name: "wheel-job"
tasks:
- task_key: test_task
python_wheel_task:
package_name: my_package
entry_point: my_module.my_function
existing_cluster_id: ${resources.clusters.interactive_cluster.id}
libraries:
- whl: ./dist/*.whl

View File

@ -0,0 +1,22 @@
>>> errcode [CLI] bundle validate -t dev
Error: Python wheel tasks require compute with DBR 13.3+ to include local libraries. Please change your cluster configuration or use the experimental 'python_wheel_wrapper' setting. See https://docs.databricks.com/dev-tools/bundles/python-wheel.html for more information.
Name: trampoline_warning_message
Target: dev
Workspace:
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/trampoline_warning_message/dev
Found 1 error
Exit code: 1
>>> errcode [CLI] bundle validate -t prod
Name: trampoline_warning_message
Target: prod
Workspace:
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/trampoline_warning_message/prod
Validation OK!

View File

@ -0,0 +1,2 @@
trace errcode $CLI bundle validate -t dev
trace errcode $CLI bundle validate -t prod

View File

@ -0,0 +1,20 @@
bundle:
name: trampoline_warning_message_with_new_spark
targets:
dev:
mode: development
default: true
resources:
jobs:
whl:
name: "wheel-job"
tasks:
- task_key: test_task
python_wheel_task:
package_name: my_package
entry_point: my_module.my_function
existing_cluster_id: "some-test-cluster-id"
libraries:
- whl: ./dist/*.whl

View File

@ -0,0 +1,9 @@
>>> errcode [CLI] bundle validate
Name: trampoline_warning_message_with_new_spark
Target: dev
Workspace:
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/trampoline_warning_message_with_new_spark/dev
Validation OK!

View File

@ -0,0 +1 @@
trace errcode $CLI bundle validate

View File

@ -0,0 +1,16 @@
# Since we use existing cluster id value which is not available in cloud envs, we need to stub the request
# and run this test only locally
Cloud = false
[[Server]]
Pattern = "GET /api/2.1/clusters/get"
Response.Body = '''
{
"cluster_id": "some-cluster-id",
"state": "RUNNING",
"spark_version": "13.3.x-scala2.12",
"node_type_id": "Standard_DS3_v2",
"driver_node_type_id": "Standard_DS3_v2",
"cluster_name": "some-cluster-name",
"spark_context_id": 12345
}'''

View File

@ -0,0 +1,20 @@
bundle:
name: trampoline_warning_message_with_old_spark
targets:
dev:
mode: development
default: true
resources:
jobs:
whl:
name: "wheel-job"
tasks:
- task_key: test_task
python_wheel_task:
package_name: my_package
entry_point: my_module.my_function
existing_cluster_id: "some-test-cluster-id"
libraries:
- whl: ./dist/*.whl

View File

@ -0,0 +1,13 @@
>>> errcode [CLI] bundle validate
Error: Python wheel tasks require compute with DBR 13.3+ to include local libraries. Please change your cluster configuration or use the experimental 'python_wheel_wrapper' setting. See https://docs.databricks.com/dev-tools/bundles/python-wheel.html for more information.
Name: trampoline_warning_message_with_old_spark
Target: dev
Workspace:
User: [USERNAME]
Path: /Workspace/Users/[USERNAME]/.bundle/trampoline_warning_message_with_old_spark/dev
Found 1 error
Exit code: 1

View File

@ -0,0 +1 @@
trace errcode $CLI bundle validate

View File

@ -0,0 +1,16 @@
# Since we use existing cluster id value which is not available in cloud envs, we need to stub the request
# and run this test only locally
Cloud = false
[[Server]]
Pattern = "GET /api/2.1/clusters/get"
Response.Body = '''
{
"cluster_id": "some-cluster-id",
"state": "RUNNING",
"spark_version": "7.3.x-scala2.12",
"node_type_id": "Standard_DS3_v2",
"driver_node_type_id": "Standard_DS3_v2",
"cluster_name": "some-cluster-name",
"spark_context_id": 12345
}'''

View File

@ -1,3 +1,3 @@
# The tests here intend to test variable interpolation via "bundle validate".
# Even though "bundle validate" does a few API calls, that's not the focus there.
LocalOnly = true
Cloud = false

View File

@ -0,0 +1,14 @@
{
"description": "My app description.",
"resources": [
{
"name": "api-key",
"description": "API key for external service.",
"secret": {
"scope": "my-scope",
"key": "my-key",
"permission": "READ"
}
}
]
}

View File

@ -0,0 +1,19 @@
{
"method": "PATCH",
"path": "/api/2.0/apps/test-name",
"body": {
"description": "My app description.",
"name": "",
"resources": [
{
"description": "API key for external service.",
"name": "api-key",
"secret": {
"key": "my-key",
"permission": "READ",
"scope": "my-scope"
}
}
]
}
}

View File

@ -0,0 +1,49 @@
=== Apps update with correct input
>>> [CLI] apps update test-name --json @input.json
{
"app_status": {
"message":"Application is running.",
"state":"DEPLOYING"
},
"compute_status": {
"message":"App compute is active.",
"state":"ERROR"
},
"description":"My app description.",
"id":"12345",
"name":"test-name",
"resources": [
{
"description":"API key for external service.",
"name":"api-key",
"secret": {
"key":"my-key",
"permission":"READ",
"scope":"my-scope"
}
}
],
"url":"test-name-123.cloud.databricksapps.com"
}
=== Apps update with missing parameter
>>> [CLI] apps update --json @input.json
Error: accepts 1 arg(s), received 0
Usage:
databricks apps update NAME [flags]
Flags:
--description string The description of the app.
-h, --help help for update
--json JSON either inline JSON string or @path/to/file.json with request body (default JSON (0 bytes))
Global Flags:
--debug enable debug logging
-o, --output type output type: text or json (default text)
-p, --profile string ~/.databrickscfg profile
-t, --target string bundle target to use (if applicable)
Exit code: 1

View File

@ -0,0 +1,5 @@
title "Apps update with correct input"
trace $CLI apps update test-name --json @input.json
title "Apps update with missing parameter"
trace $CLI apps update --json @input.json

View File

@ -0,0 +1,30 @@
RecordRequests = true
[[Server]]
Pattern = "PATCH /api/2.0/apps/test-name"
Response.Body = '''
{
"name": "test-name",
"description": "My app description.",
"compute_status": {
"state": "ERROR",
"message": "App compute is active."
},
"app_status": {
"state": "DEPLOYING",
"message": "Application is running."
},
"url": "test-name-123.cloud.databricksapps.com",
"resources": [
{
"name": "api-key",
"description": "API key for external service.",
"secret": {
"scope": "my-scope",
"key": "my-key",
"permission": "READ"
}
}
],
"id": "12345"
}'''

View File

@ -18,14 +18,17 @@ const configFilename = "test.toml"
type TestConfig struct {
// Place to describe what's wrong with this test. Does not affect how the test is run.
Badness string
Badness *string
// Which OSes the test is enabled on. Each string is compared against runtime.GOOS.
// If absent, default to true.
GOOS map[string]bool
// If true, do not run this test against cloud environment
LocalOnly bool
// If true, run this test when running locally with a testserver
Local *bool
// If true, run this test when running with cloud env configured
Cloud *bool
// List of additional replacements to apply on this test.
// Old is a regexp, New is a replacement expression.
@ -44,7 +47,7 @@ type TestConfig struct {
// Record the requests made to the server and write them as output to
// out.requests.txt
RecordRequests bool
RecordRequests *bool
// List of request headers to include when recording requests.
IncludeRequestHeaders []string
@ -102,7 +105,7 @@ func LoadConfig(t *testing.T, dir string) (TestConfig, string) {
for _, cfgName := range configs[1:] {
cfg := DoLoadConfig(t, cfgName)
err := mergo.Merge(&result, cfg, mergo.WithOverride, mergo.WithAppendSlice)
err := mergo.Merge(&result, cfg, mergo.WithOverride, mergo.WithoutDereference, mergo.WithAppendSlice)
if err != nil {
t.Fatalf("Error during config merge: %s: %s", cfgName, err)
}

View File

@ -1,4 +1,3 @@
LocalOnly = true
RecordRequests = true
[[Server]]

Some files were not shown because too many files have changed in this diff Show More