mirror of https://github.com/databricks/cli.git
databricks bundle init template v1 (#686)
## Changes This adds a built-in "default-python" template to the CLI. This is based on the new default-template support of https://github.com/databricks/cli/pull/685. The goal here is to offer an experience where customers can simply type `databricks bundle init` to get a default template: ``` $ databricks bundle init Template to use [default-python]: default-python Unique name for this project [my_project]: my_project ✨ Successfully initialized template ``` The present template: - [x] Works well with VS Code - [x] Works well with the workspace - [x] Works well with DB Connect - [x] Uses minimal stubs rather than boiler-plate-heavy examples I'll have a followup with tests + DLT support. --------- Co-authored-by: Andrew Nester <andrew.nester@databricks.com> Co-authored-by: PaulCornellDB <paul.cornell@databricks.com> Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
This commit is contained in:
parent
947d5b1e5c
commit
8c2cc07f7b
|
@ -59,7 +59,7 @@ func newInitCommand() *cobra.Command {
|
||||||
} else {
|
} else {
|
||||||
return errors.New("please specify a template")
|
return errors.New("please specify a template")
|
||||||
|
|
||||||
/* TODO: propose to use default-python (once template is ready)
|
/* TODO: propose to use default-python (once #708 is merged)
|
||||||
var err error
|
var err error
|
||||||
if !cmdio.IsOutTTY(ctx) || !cmdio.IsInTTY(ctx) {
|
if !cmdio.IsOutTTY(ctx) || !cmdio.IsInTTY(ctx) {
|
||||||
return errors.New("please specify a template")
|
return errors.New("please specify a template")
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
"project_name": {
|
"project_name": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"default": "my_project",
|
"default": "my_project",
|
||||||
"description": "Name of the directory"
|
"description": "Unique name for this project"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
|
||||||
|
.databricks/
|
||||||
|
build/
|
||||||
|
dist/
|
||||||
|
__pycache__/
|
||||||
|
*.egg-info
|
||||||
|
.venv/
|
||||||
|
scratch/**
|
||||||
|
!scratch/README.md
|
3
libs/template/templates/default-python/template/{{.project_name}}/.vscode/__builtins__.pyi
vendored
Normal file
3
libs/template/templates/default-python/template/{{.project_name}}/.vscode/__builtins__.pyi
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
# Typings for Pylance in Visual Studio Code
|
||||||
|
# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md
|
||||||
|
from databricks.sdk.runtime import *
|
7
libs/template/templates/default-python/template/{{.project_name}}/.vscode/extensions.json
vendored
Normal file
7
libs/template/templates/default-python/template/{{.project_name}}/.vscode/extensions.json
vendored
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
{
|
||||||
|
"recommendations": [
|
||||||
|
"databricks.databricks",
|
||||||
|
"ms-python.vscode-pylance",
|
||||||
|
"redhat.vscode-yaml"
|
||||||
|
]
|
||||||
|
}
|
14
libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json
vendored
Normal file
14
libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json
vendored
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
{
|
||||||
|
"python.analysis.stubPath": ".vscode",
|
||||||
|
"databricks.python.envFile": "${workspaceFolder}/.env",
|
||||||
|
"jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
|
||||||
|
"jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
|
||||||
|
"python.testing.pytestArgs": [
|
||||||
|
"."
|
||||||
|
],
|
||||||
|
"python.testing.unittestEnabled": false,
|
||||||
|
"python.testing.pytestEnabled": true,
|
||||||
|
"files.exclude": {
|
||||||
|
"**/*.egg-info": true
|
||||||
|
},
|
||||||
|
}
|
|
@ -1,3 +0,0 @@
|
||||||
# {{.project_name}}
|
|
||||||
|
|
||||||
The '{{.project_name}}' bundle was generated using the default-python template.
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
# {{.project_name}}
|
||||||
|
|
||||||
|
The '{{.project_name}}' project was generated by using the default-python template.
|
||||||
|
|
||||||
|
## Getting started
|
||||||
|
|
||||||
|
1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html
|
||||||
|
|
||||||
|
2. Authenticate to your Databricks workspace:
|
||||||
|
```
|
||||||
|
$ databricks configure
|
||||||
|
```
|
||||||
|
|
||||||
|
3. To deploy a development copy of this project, type:
|
||||||
|
```
|
||||||
|
$ databricks bundle deploy --target dev
|
||||||
|
```
|
||||||
|
(Note that "dev" is the default target, so the `--target` parameter
|
||||||
|
is optional here.)
|
||||||
|
|
||||||
|
This deploys everything that's defined for this project.
|
||||||
|
For example, the default template would deploy a job called
|
||||||
|
`[dev yourname] {{.project_name}}-job` to your workspace.
|
||||||
|
You can find that job by opening your workpace and clicking on **Workflows**.
|
||||||
|
|
||||||
|
4. Similarly, to deploy a production copy, type:
|
||||||
|
```
|
||||||
|
$ databricks bundle deploy --target prod
|
||||||
|
```
|
||||||
|
|
||||||
|
5. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
|
||||||
|
https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for
|
||||||
|
**Databricks Connect** for instructions on running the included Python code from a different IDE.
|
||||||
|
|
||||||
|
6. For documentation on the Databricks asset bundles format used
|
||||||
|
for this project, and for CI/CD configuration, see
|
||||||
|
https://docs.databricks.com/dev-tools/bundles/index.html.
|
|
@ -0,0 +1,52 @@
|
||||||
|
# This is a Databricks asset bundle definition for {{.project_name}}.
|
||||||
|
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
||||||
|
bundle:
|
||||||
|
name: {{.project_name}}
|
||||||
|
|
||||||
|
include:
|
||||||
|
- resources/*.yml
|
||||||
|
|
||||||
|
targets:
|
||||||
|
# The 'dev' target, used development purposes.
|
||||||
|
# Whenever a developer deploys using 'dev', they get their own copy.
|
||||||
|
dev:
|
||||||
|
# We use 'mode: development' to make everything deployed to this target gets a prefix
|
||||||
|
# like '[dev my_user_name]'. Setting this mode also disables any schedules and
|
||||||
|
# automatic triggers for jobs and enables the 'development' mode for Delta Live Tables pipelines.
|
||||||
|
mode: development
|
||||||
|
default: true
|
||||||
|
workspace:
|
||||||
|
host: {{workspace_host}}
|
||||||
|
|
||||||
|
# Optionally, there could be a 'staging' target here.
|
||||||
|
# (See Databricks docs on CI/CD at https://docs.databricks.com/dev-tools/bundles/index.html.)
|
||||||
|
#
|
||||||
|
# staging:
|
||||||
|
# workspace:
|
||||||
|
# host: {{workspace_host}}
|
||||||
|
|
||||||
|
# The 'prod' target, used for production deployment.
|
||||||
|
prod:
|
||||||
|
# For production deployments, we only have a single copy, so we override the
|
||||||
|
# workspace.root_path default of
|
||||||
|
# /Users/${workspace.current_user.userName}/.bundle/${bundle.target}/${bundle.name}
|
||||||
|
# to a path that is not specific to the current user.
|
||||||
|
{{- /*
|
||||||
|
Explaining 'mode: production' isn't as pressing as explaining 'mode: development'.
|
||||||
|
As we already talked about the other mode above, users can just
|
||||||
|
look at documentation or ask the assistant about 'mode: production'.
|
||||||
|
#
|
||||||
|
# By making use of 'mode: production' we enable strict checks
|
||||||
|
# to make sure we have correctly configured this target.
|
||||||
|
*/}}
|
||||||
|
mode: production
|
||||||
|
workspace:
|
||||||
|
host: {{workspace_host}}
|
||||||
|
root_path: /Shared/.bundle/prod/${bundle.name}
|
||||||
|
{{- if not is_service_principal}}
|
||||||
|
run_as:
|
||||||
|
# This runs as {{user_name}} in production. Alternatively,
|
||||||
|
# a service principal could be used here using service_principal_name
|
||||||
|
# (see Databricks documentation).
|
||||||
|
user_name: {{user_name}}
|
||||||
|
{{end -}}
|
|
@ -0,0 +1,27 @@
|
||||||
|
# Fixtures
|
||||||
|
{{- /*
|
||||||
|
We don't want to have too many README.md files, since they
|
||||||
|
stand out so much. But we do need to have a file here to make
|
||||||
|
sure the folder is added to Git.
|
||||||
|
*/}}
|
||||||
|
|
||||||
|
This folder is reserved for fixtures, such as CSV files.
|
||||||
|
|
||||||
|
Below is an example of how to load fixtures as a data frame:
|
||||||
|
|
||||||
|
```
|
||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
|
||||||
|
def get_absolute_path(*relative_parts):
|
||||||
|
if 'dbutils' in globals():
|
||||||
|
base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore
|
||||||
|
path = os.path.normpath(os.path.join(base_dir, *relative_parts))
|
||||||
|
return path if path.startswith("/Workspace") else os.path.join("/Workspace", path)
|
||||||
|
else:
|
||||||
|
return os.path.join(*relative_parts)
|
||||||
|
|
||||||
|
csv_file = get_absolute_path("..", "fixtures", "mycsv.csv")
|
||||||
|
df = pd.read_csv(csv_file)
|
||||||
|
display(df)
|
||||||
|
```
|
|
@ -0,0 +1,3 @@
|
||||||
|
[pytest]
|
||||||
|
testpaths = tests
|
||||||
|
pythonpath = src
|
|
@ -0,0 +1,42 @@
|
||||||
|
# The main job for {{.project_name}}
|
||||||
|
resources:
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
{{.project_name}}_job:
|
||||||
|
name: {{.project_name}}_job
|
||||||
|
|
||||||
|
schedule:
|
||||||
|
quartz_cron_expression: '44 37 8 * * ?'
|
||||||
|
timezone_id: Europe/Amsterdam
|
||||||
|
|
||||||
|
{{- if not is_service_principal}}
|
||||||
|
email_notifications:
|
||||||
|
on_failure:
|
||||||
|
- {{user_name}}
|
||||||
|
{{end -}}
|
||||||
|
|
||||||
|
tasks:
|
||||||
|
- task_key: notebook_task
|
||||||
|
job_cluster_key: job_cluster
|
||||||
|
notebook_task:
|
||||||
|
notebook_path: ../src/notebook.ipynb
|
||||||
|
|
||||||
|
- task_key: python_wheel_task
|
||||||
|
depends_on:
|
||||||
|
- task_key: notebook_task
|
||||||
|
job_cluster_key: job_cluster
|
||||||
|
python_wheel_task:
|
||||||
|
package_name: {{.project_name}}
|
||||||
|
entry_point: main
|
||||||
|
libraries:
|
||||||
|
- whl: ../dist/*.whl
|
||||||
|
|
||||||
|
job_clusters:
|
||||||
|
- job_cluster_key: job_cluster
|
||||||
|
new_cluster:
|
||||||
|
{{- /* we should always use an LTS version in our templates */}}
|
||||||
|
spark_version: 13.3.x-scala2.12
|
||||||
|
node_type_id: {{smallest_node_type}}
|
||||||
|
autoscale:
|
||||||
|
min_workers: 1
|
||||||
|
max_workers: 4
|
|
@ -0,0 +1,4 @@
|
||||||
|
# scratch
|
||||||
|
|
||||||
|
This folder is reserved for personal, exploratory notebooks.
|
||||||
|
By default these are not committed to Git, as 'scratch' is listed in .gitignore.
|
|
@ -0,0 +1,50 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {
|
||||||
|
"application/vnd.databricks.v1+cell": {
|
||||||
|
"cellMetadata": {
|
||||||
|
"byteLimit": 2048000,
|
||||||
|
"rowLimit": 10000
|
||||||
|
},
|
||||||
|
"inputWidgets": {},
|
||||||
|
"nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae",
|
||||||
|
"showTitle": false,
|
||||||
|
"title": ""
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import sys\n",
|
||||||
|
"sys.path.append('../src')\n",
|
||||||
|
"from project import main\n",
|
||||||
|
"\n",
|
||||||
|
"main.taxis.show(10)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"application/vnd.databricks.v1+notebook": {
|
||||||
|
"dashboards": [],
|
||||||
|
"language": "python",
|
||||||
|
"notebookMetadata": {
|
||||||
|
"pythonIndentUnit": 2
|
||||||
|
},
|
||||||
|
"notebookName": "ipynb-notebook",
|
||||||
|
"widgets": {}
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.11.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 0
|
||||||
|
}
|
|
@ -0,0 +1,24 @@
|
||||||
|
"""
|
||||||
|
Setup script for {{.project_name}}.
|
||||||
|
|
||||||
|
This script packages and distributes the associated wheel file(s).
|
||||||
|
Source code is in ./src/. Run 'python setup.py sdist bdist_wheel' to build.
|
||||||
|
"""
|
||||||
|
from setuptools import setup, find_packages
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.append('./src')
|
||||||
|
|
||||||
|
import {{.project_name}}
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name="{{.project_name}}",
|
||||||
|
version={{.project_name}}.__version__,
|
||||||
|
url="https://databricks.com",
|
||||||
|
author="{{.user_name}}",
|
||||||
|
description="my test wheel",
|
||||||
|
packages=find_packages(where='./src'),
|
||||||
|
package_dir={'': 'src'},
|
||||||
|
entry_points={"entry_points": "main={{.project_name}}.main:main"},
|
||||||
|
install_requires=["setuptools"],
|
||||||
|
)
|
|
@ -0,0 +1,65 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"application/vnd.databricks.v1+cell": {
|
||||||
|
"cellMetadata": {},
|
||||||
|
"inputWidgets": {},
|
||||||
|
"nuid": "ee353e42-ff58-4955-9608-12865bd0950e",
|
||||||
|
"showTitle": false,
|
||||||
|
"title": ""
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"# Default notebook\n",
|
||||||
|
"\n",
|
||||||
|
"This default notebook is executed using Databricks Workflows as defined in resources/{{.my_project}}_job.yml."
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 0,
|
||||||
|
"metadata": {
|
||||||
|
"application/vnd.databricks.v1+cell": {
|
||||||
|
"cellMetadata": {
|
||||||
|
"byteLimit": 2048000,
|
||||||
|
"rowLimit": 10000
|
||||||
|
},
|
||||||
|
"inputWidgets": {},
|
||||||
|
"nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae",
|
||||||
|
"showTitle": false,
|
||||||
|
"title": ""
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from {{.project_name}} import main\n",
|
||||||
|
"\n",
|
||||||
|
"main.get_taxis().show(10)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"application/vnd.databricks.v1+notebook": {
|
||||||
|
"dashboards": [],
|
||||||
|
"language": "python",
|
||||||
|
"notebookMetadata": {
|
||||||
|
"pythonIndentUnit": 2
|
||||||
|
},
|
||||||
|
"notebookName": "notebook",
|
||||||
|
"widgets": {}
|
||||||
|
},
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"name": "python",
|
||||||
|
"version": "3.11.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 0
|
||||||
|
}
|
|
@ -0,0 +1 @@
|
||||||
|
__version__ = "0.0.1"
|
|
@ -0,0 +1,16 @@
|
||||||
|
{{- /*
|
||||||
|
We use pyspark.sql rather than DatabricksSession.builder.getOrCreate()
|
||||||
|
for compatibility with older runtimes. With a new runtime, it's
|
||||||
|
equivalent to DatabricksSession.builder.getOrCreate().
|
||||||
|
*/ -}}
|
||||||
|
from pyspark.sql import SparkSession
|
||||||
|
|
||||||
|
def get_taxis():
|
||||||
|
spark = SparkSession.builder.getOrCreate()
|
||||||
|
return spark.read.table("samples.nyctaxi.trips")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
get_taxis().show(5)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
|
@ -0,0 +1,5 @@
|
||||||
|
from {{.project_name}} import main
|
||||||
|
|
||||||
|
def test_main():
|
||||||
|
taxis = main.get_taxis()
|
||||||
|
assert taxis.count() == 5
|
Loading…
Reference in New Issue