mirror of https://github.com/databricks/cli.git
Make the default `databricks bundle init` template more self-explanatory (#796)
This makes the default-python template more self-explanatory and adds a few other tweaks for a better out-of-the-box experience.
This commit is contained in:
parent
757d5efe8d
commit
0c1516c4ba
|
@ -41,6 +41,7 @@ func assertBuiltinTemplateValid(t *testing.T, settings map[string]any, target st
|
|||
|
||||
templatePath, err := prepareBuiltinTemplates("default-python", tempDir)
|
||||
require.NoError(t, err)
|
||||
libraryPath := filepath.Join(templatePath, "library")
|
||||
|
||||
w := &databricks.WorkspaceClient{
|
||||
Config: &workspaceConfig.Config{Host: "https://myhost.com"},
|
||||
|
@ -52,7 +53,7 @@ func assertBuiltinTemplateValid(t *testing.T, settings map[string]any, target st
|
|||
ctx = root.SetWorkspaceClient(ctx, w)
|
||||
helpers := loadHelpers(ctx)
|
||||
|
||||
renderer, err := newRenderer(ctx, settings, helpers, templatePath, "./testdata/template-in-path/library", tempDir)
|
||||
renderer, err := newRenderer(ctx, settings, helpers, templatePath, libraryPath, tempDir)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Evaluate template
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
{{define "latest_lts_dbr_version" -}}
|
||||
13.3.x-scala2.12
|
||||
{{- end}}
|
||||
|
||||
{{define "latest_lts_db_connect_version_spec" -}}
|
||||
>=13.3,<13.4
|
||||
{{- end}}
|
|
@ -8,7 +8,10 @@
|
|||
],
|
||||
"python.testing.unittestEnabled": false,
|
||||
"python.testing.pytestEnabled": true,
|
||||
"python.analysis.extraPaths": ["src"],
|
||||
"files.exclude": {
|
||||
"**/*.egg-info": true
|
||||
"**/*.egg-info": true,
|
||||
"**/__pycache__": true,
|
||||
".pytest_cache": true,
|
||||
},
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ The '{{.project_name}}' project was generated by using the default-python templa
|
|||
|
||||
5. To run a job or pipeline, use the "run" comand:
|
||||
```
|
||||
$ databricks bundle run {{.project_name}}_job
|
||||
$ databricks bundle run
|
||||
```
|
||||
|
||||
6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
## requirements-dev.txt: dependencies for local development.
|
||||
##
|
||||
## For defining dependencies used by jobs in Databricks Workflows, see
|
||||
## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
|
||||
|
||||
## pytest is the default package used for testing
|
||||
pytest
|
||||
|
||||
## databricks-connect can be used to run parts of this project locally.
|
||||
## See https://docs.databricks.com/dev-tools/databricks-connect.html.
|
||||
##
|
||||
## databricks-connect is automatically installed if you're using Databricks
|
||||
## extension for Visual Studio Code
|
||||
## (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html).
|
||||
##
|
||||
## To manually install databricks-connect, either follow the instructions
|
||||
## at https://docs.databricks.com/dev-tools/databricks-connect.html
|
||||
## to install the package system-wide. Or uncomment the line below to install a
|
||||
## version of db-connect that corresponds to the Databricks Runtime version used
|
||||
## for this project.
|
||||
#
|
||||
# databricks-connect{{template "latest_lts_db_connect_version_spec"}}
|
|
@ -49,6 +49,9 @@ resources:
|
|||
package_name: {{.project_name}}
|
||||
entry_point: main
|
||||
libraries:
|
||||
# By default we just include the .whl file generated for the {{.project_name}} package.
|
||||
# See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
|
||||
# for more information on how to add other libraries.
|
||||
- whl: ../dist/*.whl
|
||||
|
||||
{{else}}
|
||||
|
@ -56,8 +59,7 @@ resources:
|
|||
job_clusters:
|
||||
- job_cluster_key: job_cluster
|
||||
new_cluster:
|
||||
{{- /* we should always use an LTS version in our templates */}}
|
||||
spark_version: 13.3.x-scala2.12
|
||||
spark_version: {{template "latest_lts_dbr_version"}}
|
||||
node_type_id: {{smallest_node_type}}
|
||||
autoscale:
|
||||
min_workers: 1
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
"""
|
||||
Setup script for {{.project_name}}.
|
||||
setup.py configuration script describing how to build and package this project.
|
||||
|
||||
This script packages and distributes the associated wheel file(s).
|
||||
Source code is in ./src/. Run 'python setup.py sdist bdist_wheel' to build.
|
||||
This file is primarily used by the setuptools library and typically should not
|
||||
be executed directly. See README.md for how to deploy, test, and run
|
||||
the {{.project_name}} project.
|
||||
"""
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
|
@ -16,9 +17,18 @@ setup(
|
|||
version={{.project_name}}.__version__,
|
||||
url="https://databricks.com",
|
||||
author="{{user_name}}",
|
||||
description="my test wheel",
|
||||
description="wheel file based on {{.project_name}}/src",
|
||||
packages=find_packages(where='./src'),
|
||||
package_dir={'': 'src'},
|
||||
entry_points={"entry_points": "main={{.project_name}}.main:main"},
|
||||
install_requires=["setuptools"],
|
||||
entry_points={
|
||||
"packages": [
|
||||
"main={{.project_name}}.main:main"
|
||||
]
|
||||
},
|
||||
install_requires=[
|
||||
# Dependencies in case the output wheel file is used as a library dependency.
|
||||
# For defining dependencies, when this package is used in Databricks, see:
|
||||
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
|
||||
"setuptools"
|
||||
],
|
||||
)
|
||||
|
|
|
@ -1,5 +1,21 @@
|
|||
from databricks.connect import DatabricksSession
|
||||
from pyspark.sql import SparkSession
|
||||
from {{.project_name}} import main
|
||||
|
||||
# Create a new Databricks Connect session. If this fails,
|
||||
# check that you have configured Databricks Connect correctly.
|
||||
# See https://docs.databricks.com/dev-tools/databricks-connect.html.
|
||||
{{/*
|
||||
The below works around a problematic error message from Databricks Connect.
|
||||
The standard SparkSession is supported in all configurations (workspace, IDE,
|
||||
all runtime versions, CLI). But on the CLI it currently gives a confusing
|
||||
error message if SPARK_REMOTE is not set. We can't directly use
|
||||
DatabricksSession.builder in main.py, so we're re-assigning it here so
|
||||
everything works out of the box, even for CLI users who don't set SPARK_REMOTE.
|
||||
*/}}
|
||||
SparkSession.builder = DatabricksSession.builder
|
||||
SparkSession.builder.getOrCreate()
|
||||
|
||||
def test_main():
|
||||
taxis = main.get_taxis()
|
||||
assert taxis.count() > 5
|
||||
|
|
Loading…
Reference in New Issue