diff --git a/cmd/bundle/init.go b/cmd/bundle/init.go index 2127a7bc..bf68e921 100644 --- a/cmd/bundle/init.go +++ b/cmd/bundle/init.go @@ -59,7 +59,7 @@ func newInitCommand() *cobra.Command { } else { return errors.New("please specify a template") - /* TODO: propose to use default-python (once template is ready) + /* TODO: propose to use default-python (once #708 is merged) var err error if !cmdio.IsOutTTY(ctx) || !cmdio.IsInTTY(ctx) { return errors.New("please specify a template") diff --git a/libs/template/templates/default-python/databricks_template_schema.json b/libs/template/templates/default-python/databricks_template_schema.json index b680c5fb..3220e9a6 100644 --- a/libs/template/templates/default-python/databricks_template_schema.json +++ b/libs/template/templates/default-python/databricks_template_schema.json @@ -3,7 +3,7 @@ "project_name": { "type": "string", "default": "my_project", - "description": "Name of the directory" + "description": "Unique name for this project" } } } diff --git a/libs/template/templates/default-python/template/{{.project_name}}/.gitignore b/libs/template/templates/default-python/template/{{.project_name}}/.gitignore new file mode 100644 index 00000000..aa87f019 --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/.gitignore @@ -0,0 +1,9 @@ + +.databricks/ +build/ +dist/ +__pycache__/ +*.egg-info +.venv/ +scratch/** +!scratch/README.md diff --git a/libs/template/templates/default-python/template/{{.project_name}}/.vscode/__builtins__.pyi b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/__builtins__.pyi new file mode 100644 index 00000000..0edd5181 --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/__builtins__.pyi @@ -0,0 +1,3 @@ +# Typings for Pylance in Visual Studio Code +# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md +from databricks.sdk.runtime import * diff --git a/libs/template/templates/default-python/template/{{.project_name}}/.vscode/extensions.json b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/extensions.json new file mode 100644 index 00000000..5d15eba3 --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/extensions.json @@ -0,0 +1,7 @@ +{ + "recommendations": [ + "databricks.databricks", + "ms-python.vscode-pylance", + "redhat.vscode-yaml" + ] +} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json new file mode 100644 index 00000000..16cb2c96 --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json @@ -0,0 +1,14 @@ +{ + "python.analysis.stubPath": ".vscode", + "databricks.python.envFile": "${workspaceFolder}/.env", + "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])", + "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------", + "python.testing.pytestArgs": [ + "." + ], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true, + "files.exclude": { + "**/*.egg-info": true + }, +} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md b/libs/template/templates/default-python/template/{{.project_name}}/README.md deleted file mode 100644 index 3187b9ed..00000000 --- a/libs/template/templates/default-python/template/{{.project_name}}/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# {{.project_name}} - -The '{{.project_name}}' bundle was generated using the default-python template. diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl new file mode 100644 index 00000000..4c89435b --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl @@ -0,0 +1,37 @@ +# {{.project_name}} + +The '{{.project_name}}' project was generated by using the default-python template. + +## Getting started + +1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html + +2. Authenticate to your Databricks workspace: + ``` + $ databricks configure + ``` + +3. To deploy a development copy of this project, type: + ``` + $ databricks bundle deploy --target dev + ``` + (Note that "dev" is the default target, so the `--target` parameter + is optional here.) + + This deploys everything that's defined for this project. + For example, the default template would deploy a job called + `[dev yourname] {{.project_name}}-job` to your workspace. + You can find that job by opening your workpace and clicking on **Workflows**. + +4. Similarly, to deploy a production copy, type: + ``` + $ databricks bundle deploy --target prod + ``` + +5. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for + **Databricks Connect** for instructions on running the included Python code from a different IDE. + +6. For documentation on the Databricks asset bundles format used + for this project, and for CI/CD configuration, see + https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl new file mode 100644 index 00000000..48aef0ea --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/databricks.yml.tmpl @@ -0,0 +1,52 @@ +# This is a Databricks asset bundle definition for {{.project_name}}. +# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation. +bundle: + name: {{.project_name}} + +include: + - resources/*.yml + +targets: + # The 'dev' target, used development purposes. + # Whenever a developer deploys using 'dev', they get their own copy. + dev: + # We use 'mode: development' to make everything deployed to this target gets a prefix + # like '[dev my_user_name]'. Setting this mode also disables any schedules and + # automatic triggers for jobs and enables the 'development' mode for Delta Live Tables pipelines. + mode: development + default: true + workspace: + host: {{workspace_host}} + + # Optionally, there could be a 'staging' target here. + # (See Databricks docs on CI/CD at https://docs.databricks.com/dev-tools/bundles/index.html.) + # + # staging: + # workspace: + # host: {{workspace_host}} + + # The 'prod' target, used for production deployment. + prod: + # For production deployments, we only have a single copy, so we override the + # workspace.root_path default of + # /Users/${workspace.current_user.userName}/.bundle/${bundle.target}/${bundle.name} + # to a path that is not specific to the current user. + {{- /* + Explaining 'mode: production' isn't as pressing as explaining 'mode: development'. + As we already talked about the other mode above, users can just + look at documentation or ask the assistant about 'mode: production'. + # + # By making use of 'mode: production' we enable strict checks + # to make sure we have correctly configured this target. + */}} + mode: production + workspace: + host: {{workspace_host}} + root_path: /Shared/.bundle/prod/${bundle.name} + {{- if not is_service_principal}} + run_as: + # This runs as {{user_name}} in production. Alternatively, + # a service principal could be used here using service_principal_name + # (see Databricks documentation). + user_name: {{user_name}} + {{end -}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/fixtures/.gitkeep.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/fixtures/.gitkeep.tmpl new file mode 100644 index 00000000..361c681f --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/fixtures/.gitkeep.tmpl @@ -0,0 +1,27 @@ +# Fixtures +{{- /* +We don't want to have too many README.md files, since they +stand out so much. But we do need to have a file here to make +sure the folder is added to Git. +*/}} + +This folder is reserved for fixtures, such as CSV files. + +Below is an example of how to load fixtures as a data frame: + +``` +import pandas as pd +import os + +def get_absolute_path(*relative_parts): + if 'dbutils' in globals(): + base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore + path = os.path.normpath(os.path.join(base_dir, *relative_parts)) + return path if path.startswith("/Workspace") else os.path.join("/Workspace", path) + else: + return os.path.join(*relative_parts) + +csv_file = get_absolute_path("..", "fixtures", "mycsv.csv") +df = pd.read_csv(csv_file) +display(df) +``` diff --git a/libs/template/templates/default-python/template/{{.project_name}}/pytest.ini b/libs/template/templates/default-python/template/{{.project_name}}/pytest.ini new file mode 100644 index 00000000..80432c22 --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +pythonpath = src diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl new file mode 100644 index 00000000..f8116cdf --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl @@ -0,0 +1,42 @@ +# The main job for {{.project_name}} +resources: + + jobs: + {{.project_name}}_job: + name: {{.project_name}}_job + + schedule: + quartz_cron_expression: '44 37 8 * * ?' + timezone_id: Europe/Amsterdam + + {{- if not is_service_principal}} + email_notifications: + on_failure: + - {{user_name}} + {{end -}} + + tasks: + - task_key: notebook_task + job_cluster_key: job_cluster + notebook_task: + notebook_path: ../src/notebook.ipynb + + - task_key: python_wheel_task + depends_on: + - task_key: notebook_task + job_cluster_key: job_cluster + python_wheel_task: + package_name: {{.project_name}} + entry_point: main + libraries: + - whl: ../dist/*.whl + + job_clusters: + - job_cluster_key: job_cluster + new_cluster: + {{- /* we should always use an LTS version in our templates */}} + spark_version: 13.3.x-scala2.12 + node_type_id: {{smallest_node_type}} + autoscale: + min_workers: 1 + max_workers: 4 diff --git a/libs/template/templates/default-python/template/{{.project_name}}/scratch/README.md b/libs/template/templates/default-python/template/{{.project_name}}/scratch/README.md new file mode 100644 index 00000000..e6cfb81b --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/scratch/README.md @@ -0,0 +1,4 @@ +# scratch + +This folder is reserved for personal, exploratory notebooks. +By default these are not committed to Git, as 'scratch' is listed in .gitignore. diff --git a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb new file mode 100644 index 00000000..2ee36c3c --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb @@ -0,0 +1,50 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "import sys\n", + "sys.path.append('../src')\n", + "from project import main\n", + "\n", + "main.taxis.show(10)" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "ipynb-notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl new file mode 100644 index 00000000..93f4e9ff --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl @@ -0,0 +1,24 @@ +""" +Setup script for {{.project_name}}. + +This script packages and distributes the associated wheel file(s). +Source code is in ./src/. Run 'python setup.py sdist bdist_wheel' to build. +""" +from setuptools import setup, find_packages + +import sys +sys.path.append('./src') + +import {{.project_name}} + +setup( + name="{{.project_name}}", + version={{.project_name}}.__version__, + url="https://databricks.com", + author="{{.user_name}}", + description="my test wheel", + packages=find_packages(where='./src'), + package_dir={'': 'src'}, + entry_points={"entry_points": "main={{.project_name}}.main:main"}, + install_requires=["setuptools"], +) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl new file mode 100644 index 00000000..26c74303 --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl @@ -0,0 +1,65 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "ee353e42-ff58-4955-9608-12865bd0950e", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# Default notebook\n", + "\n", + "This default notebook is executed using Databricks Workflows as defined in resources/{{.my_project}}_job.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": { + "byteLimit": 2048000, + "rowLimit": 10000 + }, + "inputWidgets": {}, + "nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + "from {{.project_name}} import main\n", + "\n", + "main.get_taxis().show(10)\n" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "notebook", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/__init__.py b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/__init__.py new file mode 100644 index 00000000..f102a9ca --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.1" diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl new file mode 100644 index 00000000..4fe5ac8f --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl @@ -0,0 +1,16 @@ +{{- /* +We use pyspark.sql rather than DatabricksSession.builder.getOrCreate() +for compatibility with older runtimes. With a new runtime, it's +equivalent to DatabricksSession.builder.getOrCreate(). +*/ -}} +from pyspark.sql import SparkSession + +def get_taxis(): + spark = SparkSession.builder.getOrCreate() + return spark.read.table("samples.nyctaxi.trips") + +def main(): + get_taxis().show(5) + +if __name__ == '__main__': + main() diff --git a/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl new file mode 100644 index 00000000..92afccc6 --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl @@ -0,0 +1,5 @@ +from {{.project_name}} import main + +def test_main(): + taxis = main.get_taxis() + assert taxis.count() == 5