databricks bundle init template v1 (#686)

## Changes This adds a built-in "default-python" template to the CLI. This is based on the new default-template support of https://github.com/databricks/cli/pull/685. The goal here is to offer an experience where customers can simply type `databricks bundle init` to get a default template: ``` $ databricks bundle init Template to use [default-python]: default-python Unique name for this project [my_project]: my_project ✨ Successfully initialized template ``` The present template: - [x] Works well with VS Code - [x] Works well with the workspace - [x] Works well with DB Connect - [x] Uses minimal stubs rather than boiler-plate-heavy examples I'll have a followup with tests + DLT support. --------- Co-authored-by: Andrew Nester <andrew.nester@databricks.com> Co-authored-by: PaulCornellDB <paul.cornell@databricks.com> Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
2023-09-05 04:58:34 -07:00 · 2023-09-05 04:58:34 -07:00 · 8c2cc07f7b
parent 947d5b1e5c
commit 8c2cc07f7b
19 changed files with 361 additions and 5 deletions
--- a/cmd/bundle/init.go
+++ b/cmd/bundle/init.go
@ -59,7 +59,7 @@ func newInitCommand() *cobra.Command {
 		} else {
 			return errors.New("please specify a template")
-			/* TODO: propose to use default-python (once template is ready)
+			/* TODO: propose to use default-python (once #708 is merged)
 			var err error
 			if !cmdio.IsOutTTY(ctx) || !cmdio.IsInTTY(ctx) {
 				return errors.New("please specify a template")
--- a/libs/template/templates/default-python/databricks_template_schema.json
+++ b/libs/template/templates/default-python/databricks_template_schema.json
@ -3,7 +3,7 @@
        "project_name": {
            "type": "string",
            "default": "my_project",
-            "description": "Name of the directory"
+            "description": "Unique name for this project"
        }
    }
 }
--- a/libs/template/templates/default-python/template/{{.project_name}}/.gitignore
+++ b/libs/template/templates/default-python/template/{{.project_name}}/.gitignore
@ -0,0 +1,9 @@
 .databricks/
 build/
 dist/
 __pycache__/
 *.egg-info
 .venv/
 scratch/**
 !scratch/README.md
--- a/libs/template/templates/default-python/template/{{.project_name}}/.vscode/builtins.pyi
+++ b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/builtins.pyi
@ -0,0 +1,3 @@
 # Typings for Pylance in Visual Studio Code
 # see https://github.com/microsoft/pyright/blob/main/docs/builtins.md
 from databricks.sdk.runtime import *
--- a/libs/template/templates/default-python/template/{{.project_name}}/.vscode/extensions.json
+++ b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/extensions.json
@ -0,0 +1,7 @@
 {
    "recommendations": [
        "databricks.databricks",
        "ms-python.vscode-pylance",
        "redhat.vscode-yaml"
    ]
 }
--- a/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json
+++ b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json
@ -0,0 +1,14 @@
 {
    "python.analysis.stubPath": ".vscode",
    "databricks.python.envFile": "${workspaceFolder}/.env",
    "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
    "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
    "python.testing.pytestArgs": [
        "."
    ],
    "python.testing.unittestEnabled": false,
    "python.testing.pytestEnabled": true,
    "files.exclude": {
        "**/*.egg-info": true
    },
 }
--- a/libs/template/templates/default-python/template/{{.project_name}}/README.md
+++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md
@ -1,3 +0,0 @@
 # {{.project_name}}
 The '{{.project_name}}' bundle was generated using the default-python template.
--- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl
@ -0,0 +1,37 @@
 # {{.project_name}}
 The '{{.project_name}}' project was generated by using the default-python template.
 ## Getting started
 1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html
 2. Authenticate to your Databricks workspace:
    ```
    $ databricks configure
    ```
 3. To deploy a development copy of this project, type:
    ```
    $ databricks bundle deploy --target dev
    ```
    (Note that "dev" is the default target, so the `--target` parameter
    is optional here.)
    This deploys everything that's defined for this project.
    For example, the default template would deploy a job called
    `[dev yourname] {{.project_name}}-job` to your workspace.
    You can find that job by opening your workpace and clicking on **Workflows**.
 4. Similarly, to deploy a production copy, type:
   ```
   $ databricks bundle deploy --target prod
   ```
 5. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
   https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for
   **Databricks Connect** for instructions on running the included Python code from a different IDE.
 6. For documentation on the Databricks asset bundles format used
   for this project, and for CI/CD configuration, see
   https://docs.databricks.com/dev-tools/bundles/index.html.
--- a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl
@ -0,0 +1,52 @@
 # This is a Databricks asset bundle definition for {{.project_name}}.
 # See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
 bundle:
  name: {{.project_name}}
 include:
  - resources/*.yml
 targets:
  # The 'dev' target, used development purposes.
  # Whenever a developer deploys using 'dev', they get their own copy.
  dev:
    # We use 'mode: development' to make everything deployed to this target gets a prefix
    # like '[dev my_user_name]'. Setting this mode also disables any schedules and
    # automatic triggers for jobs and enables the 'development' mode for Delta Live Tables pipelines.
    mode: development
    default: true
    workspace:
      host: {{workspace_host}}
  # Optionally, there could be a 'staging' target here.
  # (See Databricks docs on CI/CD at https://docs.databricks.com/dev-tools/bundles/index.html.)
  #
  # staging:
  #  workspace:
  #    host: {{workspace_host}}
  # The 'prod' target, used for production deployment.
  prod:
    # For production deployments, we only have a single copy, so we override the
    # workspace.root_path default of
    # /Users/${workspace.current_user.userName}/.bundle/${bundle.target}/${bundle.name}
    # to a path that is not specific to the current user.
    {{- /*
    Explaining 'mode: production' isn't as pressing as explaining 'mode: development'.
    As we already talked about the other mode above, users can just
    look at documentation or ask the assistant about 'mode: production'.
    #
    # By making use of 'mode: production' we enable strict checks
    # to make sure we have correctly configured this target.
    */}}
    mode: production
    workspace:
      host: {{workspace_host}}
      root_path: /Shared/.bundle/prod/${bundle.name}
    {{- if not is_service_principal}}
    run_as:
      # This runs as {{user_name}} in production. Alternatively,
      # a service principal could be used here using service_principal_name
      # (see Databricks documentation).
      user_name: {{user_name}}
    {{end -}}
--- a/libs/template/templates/default-python/template/{{.project_name}}/fixtures/.gitkeep.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/fixtures/.gitkeep.tmpl
@ -0,0 +1,27 @@
 # Fixtures
 {{- /*
 We don't want to have too many README.md files, since they
 stand out so much. But we do need to have a file here to make
 sure the folder is added to Git.
 */}}
 This folder is reserved for fixtures, such as CSV files.
 Below is an example of how to load fixtures as a data frame:
 ```
 import pandas as pd
 import os
 def get_absolute_path(*relative_parts):
    if 'dbutils' in globals():
        base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore
        path = os.path.normpath(os.path.join(base_dir, *relative_parts))
        return path if path.startswith("/Workspace") else os.path.join("/Workspace", path)
    else:
        return os.path.join(*relative_parts)
 csv_file = get_absolute_path("..", "fixtures", "mycsv.csv")
 df = pd.read_csv(csv_file)
 display(df)
 ```
--- a/libs/template/templates/default-python/template/{{.project_name}}/pytest.ini
+++ b/libs/template/templates/default-python/template/{{.project_name}}/pytest.ini
@ -0,0 +1,3 @@
 [pytest]
 testpaths = tests
 pythonpath = src
--- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl
@ -0,0 +1,42 @@
 # The main job for {{.project_name}}
 resources:
  jobs:
    {{.project_name}}_job:
      name: {{.project_name}}_job
      schedule:
        quartz_cron_expression: '44 37 8 * * ?'
        timezone_id: Europe/Amsterdam
      {{- if not is_service_principal}}
      email_notifications:
        on_failure:
          - {{user_name}}
      {{end -}}
      tasks:
        - task_key: notebook_task
          job_cluster_key: job_cluster
          notebook_task:
            notebook_path: ../src/notebook.ipynb
        - task_key: python_wheel_task
          depends_on:
            - task_key: notebook_task
          job_cluster_key: job_cluster
          python_wheel_task:
            package_name: {{.project_name}}
            entry_point: main
          libraries:
            - whl: ../dist/*.whl
      job_clusters:
        - job_cluster_key: job_cluster
          new_cluster:
            {{- /* we should always use an LTS version in our templates */}}
            spark_version: 13.3.x-scala2.12
            node_type_id: {{smallest_node_type}}
            autoscale:
                min_workers: 1
                max_workers: 4
--- a/libs/template/templates/default-python/template/{{.project_name}}/scratch/README.md
+++ b/libs/template/templates/default-python/template/{{.project_name}}/scratch/README.md
@ -0,0 +1,4 @@
 # scratch
 This folder is reserved for personal, exploratory notebooks.
 By default these are not committed to Git, as 'scratch' is listed in .gitignore.
--- a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb
+++ b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb
@ -0,0 +1,50 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('../src')\n",
    "from project import main\n",
    "\n",
    "main.taxis.show(10)"
   ]
  }
 ],
 "metadata": {
  "application/vnd.databricks.v1+notebook": {
   "dashboards": [],
   "language": "python",
   "notebookMetadata": {
    "pythonIndentUnit": 2
   },
   "notebookName": "ipynb-notebook",
   "widgets": {}
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl
@ -0,0 +1,24 @@
 """
 Setup script for {{.project_name}}.
 This script packages and distributes the associated wheel file(s).
 Source code is in ./src/. Run 'python setup.py sdist bdist_wheel' to build.
 """
 from setuptools import setup, find_packages
 import sys
 sys.path.append('./src')
 import {{.project_name}}
 setup(
    name="{{.project_name}}",
    version={{.project_name}}.__version__,
    url="https://databricks.com",
    author="{{.user_name}}",
    description="my test wheel",
    packages=find_packages(where='./src'),
    package_dir={'': 'src'},
    entry_points={"entry_points": "main={{.project_name}}.main:main"},
    install_requires=["setuptools"],
 )
--- a/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl
@ -0,0 +1,65 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "ee353e42-ff58-4955-9608-12865bd0950e",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "# Default notebook\n",
    "\n",
    "This default notebook is executed using Databricks Workflows as defined in resources/{{.my_project}}_job.yml."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "from {{.project_name}} import main\n",
    "\n",
    "main.get_taxis().show(10)\n"
   ]
  }
 ],
 "metadata": {
  "application/vnd.databricks.v1+notebook": {
   "dashboards": [],
   "language": "python",
   "notebookMetadata": {
    "pythonIndentUnit": 2
   },
   "notebookName": "notebook",
   "widgets": {}
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/init.py
+++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/init.py
@ -0,0 +1 @@
 __version__ = "0.0.1"
--- a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl
@ -0,0 +1,16 @@
 {{- /*
 We use pyspark.sql rather than DatabricksSession.builder.getOrCreate()
 for compatibility with older runtimes. With a new runtime, it's
 equivalent to DatabricksSession.builder.getOrCreate().
 */ -}}
 from pyspark.sql import SparkSession
 def get_taxis():
  spark = SparkSession.builder.getOrCreate()
  return spark.read.table("samples.nyctaxi.trips")
 def main():
  get_taxis().show(5)
 if __name__ == '__main__':
  main()
--- a/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl
@ -0,0 +1,5 @@
 from {{.project_name}} import main
 def test_main():
    taxis = main.get_taxis()
    assert taxis.count() == 5
		`@ -1,3 +0,0 @@`
			`# {{.project_name}}`

			`The '{{.project_name}}' bundle was generated using the default-python template.`