mirror of https://github.com/databricks/cli.git
databricks bundle init template v1 (#686)
## Changes This adds a built-in "default-python" template to the CLI. This is based on the new default-template support of https://github.com/databricks/cli/pull/685. The goal here is to offer an experience where customers can simply type `databricks bundle init` to get a default template: ``` $ databricks bundle init Template to use [default-python]: default-python Unique name for this project [my_project]: my_project ✨ Successfully initialized template ``` The present template: - [x] Works well with VS Code - [x] Works well with the workspace - [x] Works well with DB Connect - [x] Uses minimal stubs rather than boiler-plate-heavy examples I'll have a followup with tests + DLT support. --------- Co-authored-by: Andrew Nester <andrew.nester@databricks.com> Co-authored-by: PaulCornellDB <paul.cornell@databricks.com> Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
This commit is contained in:
parent
947d5b1e5c
commit
8c2cc07f7b
|
@ -59,7 +59,7 @@ func newInitCommand() *cobra.Command {
|
|||
} else {
|
||||
return errors.New("please specify a template")
|
||||
|
||||
/* TODO: propose to use default-python (once template is ready)
|
||||
/* TODO: propose to use default-python (once #708 is merged)
|
||||
var err error
|
||||
if !cmdio.IsOutTTY(ctx) || !cmdio.IsInTTY(ctx) {
|
||||
return errors.New("please specify a template")
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
"project_name": {
|
||||
"type": "string",
|
||||
"default": "my_project",
|
||||
"description": "Name of the directory"
|
||||
"description": "Unique name for this project"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
|
||||
.databricks/
|
||||
build/
|
||||
dist/
|
||||
__pycache__/
|
||||
*.egg-info
|
||||
.venv/
|
||||
scratch/**
|
||||
!scratch/README.md
|
3
libs/template/templates/default-python/template/{{.project_name}}/.vscode/__builtins__.pyi
vendored
Normal file
3
libs/template/templates/default-python/template/{{.project_name}}/.vscode/__builtins__.pyi
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
# Typings for Pylance in Visual Studio Code
|
||||
# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md
|
||||
from databricks.sdk.runtime import *
|
7
libs/template/templates/default-python/template/{{.project_name}}/.vscode/extensions.json
vendored
Normal file
7
libs/template/templates/default-python/template/{{.project_name}}/.vscode/extensions.json
vendored
Normal file
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
"recommendations": [
|
||||
"databricks.databricks",
|
||||
"ms-python.vscode-pylance",
|
||||
"redhat.vscode-yaml"
|
||||
]
|
||||
}
|
14
libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json
vendored
Normal file
14
libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
{
|
||||
"python.analysis.stubPath": ".vscode",
|
||||
"databricks.python.envFile": "${workspaceFolder}/.env",
|
||||
"jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
|
||||
"jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
|
||||
"python.testing.pytestArgs": [
|
||||
"."
|
||||
],
|
||||
"python.testing.unittestEnabled": false,
|
||||
"python.testing.pytestEnabled": true,
|
||||
"files.exclude": {
|
||||
"**/*.egg-info": true
|
||||
},
|
||||
}
|
|
@ -1,3 +0,0 @@
|
|||
# {{.project_name}}
|
||||
|
||||
The '{{.project_name}}' bundle was generated using the default-python template.
|
|
@ -0,0 +1,37 @@
|
|||
# {{.project_name}}
|
||||
|
||||
The '{{.project_name}}' project was generated by using the default-python template.
|
||||
|
||||
## Getting started
|
||||
|
||||
1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html
|
||||
|
||||
2. Authenticate to your Databricks workspace:
|
||||
```
|
||||
$ databricks configure
|
||||
```
|
||||
|
||||
3. To deploy a development copy of this project, type:
|
||||
```
|
||||
$ databricks bundle deploy --target dev
|
||||
```
|
||||
(Note that "dev" is the default target, so the `--target` parameter
|
||||
is optional here.)
|
||||
|
||||
This deploys everything that's defined for this project.
|
||||
For example, the default template would deploy a job called
|
||||
`[dev yourname] {{.project_name}}-job` to your workspace.
|
||||
You can find that job by opening your workpace and clicking on **Workflows**.
|
||||
|
||||
4. Similarly, to deploy a production copy, type:
|
||||
```
|
||||
$ databricks bundle deploy --target prod
|
||||
```
|
||||
|
||||
5. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
|
||||
https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for
|
||||
**Databricks Connect** for instructions on running the included Python code from a different IDE.
|
||||
|
||||
6. For documentation on the Databricks asset bundles format used
|
||||
for this project, and for CI/CD configuration, see
|
||||
https://docs.databricks.com/dev-tools/bundles/index.html.
|
|
@ -0,0 +1,52 @@
|
|||
# This is a Databricks asset bundle definition for {{.project_name}}.
|
||||
# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
|
||||
bundle:
|
||||
name: {{.project_name}}
|
||||
|
||||
include:
|
||||
- resources/*.yml
|
||||
|
||||
targets:
|
||||
# The 'dev' target, used development purposes.
|
||||
# Whenever a developer deploys using 'dev', they get their own copy.
|
||||
dev:
|
||||
# We use 'mode: development' to make everything deployed to this target gets a prefix
|
||||
# like '[dev my_user_name]'. Setting this mode also disables any schedules and
|
||||
# automatic triggers for jobs and enables the 'development' mode for Delta Live Tables pipelines.
|
||||
mode: development
|
||||
default: true
|
||||
workspace:
|
||||
host: {{workspace_host}}
|
||||
|
||||
# Optionally, there could be a 'staging' target here.
|
||||
# (See Databricks docs on CI/CD at https://docs.databricks.com/dev-tools/bundles/index.html.)
|
||||
#
|
||||
# staging:
|
||||
# workspace:
|
||||
# host: {{workspace_host}}
|
||||
|
||||
# The 'prod' target, used for production deployment.
|
||||
prod:
|
||||
# For production deployments, we only have a single copy, so we override the
|
||||
# workspace.root_path default of
|
||||
# /Users/${workspace.current_user.userName}/.bundle/${bundle.target}/${bundle.name}
|
||||
# to a path that is not specific to the current user.
|
||||
{{- /*
|
||||
Explaining 'mode: production' isn't as pressing as explaining 'mode: development'.
|
||||
As we already talked about the other mode above, users can just
|
||||
look at documentation or ask the assistant about 'mode: production'.
|
||||
#
|
||||
# By making use of 'mode: production' we enable strict checks
|
||||
# to make sure we have correctly configured this target.
|
||||
*/}}
|
||||
mode: production
|
||||
workspace:
|
||||
host: {{workspace_host}}
|
||||
root_path: /Shared/.bundle/prod/${bundle.name}
|
||||
{{- if not is_service_principal}}
|
||||
run_as:
|
||||
# This runs as {{user_name}} in production. Alternatively,
|
||||
# a service principal could be used here using service_principal_name
|
||||
# (see Databricks documentation).
|
||||
user_name: {{user_name}}
|
||||
{{end -}}
|
|
@ -0,0 +1,27 @@
|
|||
# Fixtures
|
||||
{{- /*
|
||||
We don't want to have too many README.md files, since they
|
||||
stand out so much. But we do need to have a file here to make
|
||||
sure the folder is added to Git.
|
||||
*/}}
|
||||
|
||||
This folder is reserved for fixtures, such as CSV files.
|
||||
|
||||
Below is an example of how to load fixtures as a data frame:
|
||||
|
||||
```
|
||||
import pandas as pd
|
||||
import os
|
||||
|
||||
def get_absolute_path(*relative_parts):
|
||||
if 'dbutils' in globals():
|
||||
base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore
|
||||
path = os.path.normpath(os.path.join(base_dir, *relative_parts))
|
||||
return path if path.startswith("/Workspace") else os.path.join("/Workspace", path)
|
||||
else:
|
||||
return os.path.join(*relative_parts)
|
||||
|
||||
csv_file = get_absolute_path("..", "fixtures", "mycsv.csv")
|
||||
df = pd.read_csv(csv_file)
|
||||
display(df)
|
||||
```
|
|
@ -0,0 +1,3 @@
|
|||
[pytest]
|
||||
testpaths = tests
|
||||
pythonpath = src
|
|
@ -0,0 +1,42 @@
|
|||
# The main job for {{.project_name}}
|
||||
resources:
|
||||
|
||||
jobs:
|
||||
{{.project_name}}_job:
|
||||
name: {{.project_name}}_job
|
||||
|
||||
schedule:
|
||||
quartz_cron_expression: '44 37 8 * * ?'
|
||||
timezone_id: Europe/Amsterdam
|
||||
|
||||
{{- if not is_service_principal}}
|
||||
email_notifications:
|
||||
on_failure:
|
||||
- {{user_name}}
|
||||
{{end -}}
|
||||
|
||||
tasks:
|
||||
- task_key: notebook_task
|
||||
job_cluster_key: job_cluster
|
||||
notebook_task:
|
||||
notebook_path: ../src/notebook.ipynb
|
||||
|
||||
- task_key: python_wheel_task
|
||||
depends_on:
|
||||
- task_key: notebook_task
|
||||
job_cluster_key: job_cluster
|
||||
python_wheel_task:
|
||||
package_name: {{.project_name}}
|
||||
entry_point: main
|
||||
libraries:
|
||||
- whl: ../dist/*.whl
|
||||
|
||||
job_clusters:
|
||||
- job_cluster_key: job_cluster
|
||||
new_cluster:
|
||||
{{- /* we should always use an LTS version in our templates */}}
|
||||
spark_version: 13.3.x-scala2.12
|
||||
node_type_id: {{smallest_node_type}}
|
||||
autoscale:
|
||||
min_workers: 1
|
||||
max_workers: 4
|
|
@ -0,0 +1,4 @@
|
|||
# scratch
|
||||
|
||||
This folder is reserved for personal, exploratory notebooks.
|
||||
By default these are not committed to Git, as 'scratch' is listed in .gitignore.
|
|
@ -0,0 +1,50 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"application/vnd.databricks.v1+cell": {
|
||||
"cellMetadata": {
|
||||
"byteLimit": 2048000,
|
||||
"rowLimit": 10000
|
||||
},
|
||||
"inputWidgets": {},
|
||||
"nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae",
|
||||
"showTitle": false,
|
||||
"title": ""
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import sys\n",
|
||||
"sys.path.append('../src')\n",
|
||||
"from project import main\n",
|
||||
"\n",
|
||||
"main.taxis.show(10)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"application/vnd.databricks.v1+notebook": {
|
||||
"dashboards": [],
|
||||
"language": "python",
|
||||
"notebookMetadata": {
|
||||
"pythonIndentUnit": 2
|
||||
},
|
||||
"notebookName": "ipynb-notebook",
|
||||
"widgets": {}
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
"""
|
||||
Setup script for {{.project_name}}.
|
||||
|
||||
This script packages and distributes the associated wheel file(s).
|
||||
Source code is in ./src/. Run 'python setup.py sdist bdist_wheel' to build.
|
||||
"""
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
import sys
|
||||
sys.path.append('./src')
|
||||
|
||||
import {{.project_name}}
|
||||
|
||||
setup(
|
||||
name="{{.project_name}}",
|
||||
version={{.project_name}}.__version__,
|
||||
url="https://databricks.com",
|
||||
author="{{.user_name}}",
|
||||
description="my test wheel",
|
||||
packages=find_packages(where='./src'),
|
||||
package_dir={'': 'src'},
|
||||
entry_points={"entry_points": "main={{.project_name}}.main:main"},
|
||||
install_requires=["setuptools"],
|
||||
)
|
|
@ -0,0 +1,65 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"application/vnd.databricks.v1+cell": {
|
||||
"cellMetadata": {},
|
||||
"inputWidgets": {},
|
||||
"nuid": "ee353e42-ff58-4955-9608-12865bd0950e",
|
||||
"showTitle": false,
|
||||
"title": ""
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"# Default notebook\n",
|
||||
"\n",
|
||||
"This default notebook is executed using Databricks Workflows as defined in resources/{{.my_project}}_job.yml."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 0,
|
||||
"metadata": {
|
||||
"application/vnd.databricks.v1+cell": {
|
||||
"cellMetadata": {
|
||||
"byteLimit": 2048000,
|
||||
"rowLimit": 10000
|
||||
},
|
||||
"inputWidgets": {},
|
||||
"nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae",
|
||||
"showTitle": false,
|
||||
"title": ""
|
||||
}
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from {{.project_name}} import main\n",
|
||||
"\n",
|
||||
"main.get_taxis().show(10)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"application/vnd.databricks.v1+notebook": {
|
||||
"dashboards": [],
|
||||
"language": "python",
|
||||
"notebookMetadata": {
|
||||
"pythonIndentUnit": 2
|
||||
},
|
||||
"notebookName": "notebook",
|
||||
"widgets": {}
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 0
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
__version__ = "0.0.1"
|
|
@ -0,0 +1,16 @@
|
|||
{{- /*
|
||||
We use pyspark.sql rather than DatabricksSession.builder.getOrCreate()
|
||||
for compatibility with older runtimes. With a new runtime, it's
|
||||
equivalent to DatabricksSession.builder.getOrCreate().
|
||||
*/ -}}
|
||||
from pyspark.sql import SparkSession
|
||||
|
||||
def get_taxis():
|
||||
spark = SparkSession.builder.getOrCreate()
|
||||
return spark.read.table("samples.nyctaxi.trips")
|
||||
|
||||
def main():
|
||||
get_taxis().show(5)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,5 @@
|
|||
from {{.project_name}} import main
|
||||
|
||||
def test_main():
|
||||
taxis = main.get_taxis()
|
||||
assert taxis.count() == 5
|
Loading…
Reference in New Issue