Add experimental-jobs-as-code template (#2177)

## Changes Add experimental-jobs-as-code template allowing defining jobs using Python instead of YAML through the `databricks-bundles` PyPI package. ## Tests Manually and acceptance tests.
2025-01-20 11:15:11 +01:00 · 2025-01-20 11:15:11 +01:00 · 31c10c1b82
parent 7034793d1d
commit 31c10c1b82
36 changed files with 1182 additions and 0 deletions
--- a/acceptance/acceptance_test.go
+++ b/acceptance/acceptance_test.go
@ -8,6 +8,7 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
+	"regexp"
 	"runtime"
 	"slices"
 	"sort"
@ -393,6 +394,16 @@ func CopyDir(src, dst string, inputs, outputs map[string]bool) error {
 }

 func ListDir(t *testing.T, src string) ([]string, error) {
+	// exclude folders in .gitignore from comparison
+	ignored := []string{
+		"\\.ruff_cache",
+		"\\.venv",
+		".*\\.egg-info",
+		"__pycache__",
+		// depends on uv version
+		"uv.lock",
+	}
+
 	var files []string
 	err := filepath.Walk(src, func(path string, info os.FileInfo, err error) error {
 		if err != nil {
@ -400,7 +411,19 @@ func ListDir(t *testing.T, src string) ([]string, error) {
 		}

 		if info.IsDir() {
+			for _, ignoredFolder := range ignored {
+				if matched, _ := regexp.MatchString(ignoredFolder, info.Name()); matched {
+					return filepath.SkipDir
+				}
+			}
+
 			return nil
+		} else {
+			for _, ignoredFolder := range ignored {
+				if matched, _ := regexp.MatchString(ignoredFolder, info.Name()); matched {
+					return nil
+				}
+			}
 		}

 		relPath, err := filepath.Rel(src, path)
--- a/acceptance/bundle/templates/experimental-jobs-as-code/input.json
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/input.json
@ -0,0 +1,5 @@
+{
+  "project_name": "my_jobs_as_code",
+  "include_notebook": "yes",
+  "include_python": "yes"
+}
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output.txt
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt
@ -0,0 +1,85 @@
+
+>>> $CLI bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output
+
+Welcome to (EXPERIMENTAL) "Jobs as code" template for Databricks Asset Bundles!
+Workspace to use (auto-detected, edit in 'my_jobs_as_code/databricks.yml'): $DATABRICKS_URL
+
+✨ Your new project has been created in the 'my_jobs_as_code' directory!
+
+Please refer to the README.md file for "getting started" instructions.
+See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html.
+
+>>> $CLI bundle validate -t dev --output json
+{
+  "jobs": {
+    "my_jobs_as_code_job": {
+      "deployment": {
+        "kind": "BUNDLE",
+        "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/my_jobs_as_code/dev/state/metadata.json"
+      },
+      "edit_mode": "UI_LOCKED",
+      "email_notifications": {
+        "on_failure": [
+          "$USERNAME"
+        ]
+      },
+      "format": "MULTI_TASK",
+      "job_clusters": [
+        {
+          "job_cluster_key": "job_cluster",
+          "new_cluster": {
+            "autoscale": {
+              "max_workers": 4,
+              "min_workers": 1
+            },
+            "node_type_id": "i3.xlarge",
+            "spark_version": "15.4.x-scala2.12"
+          }
+        }
+      ],
+      "max_concurrent_runs": 4,
+      "name": "[dev $USERNAME] my_jobs_as_code_job",
+      "permissions": [],
+      "queue": {
+        "enabled": true
+      },
+      "tags": {
+        "dev": "$USERNAME"
+      },
+      "tasks": [
+        {
+          "job_cluster_key": "job_cluster",
+          "notebook_task": {
+            "notebook_path": "/Workspace/Users/$USERNAME/.bundle/my_jobs_as_code/dev/files/src/notebook"
+          },
+          "task_key": "notebook_task"
+        },
+        {
+          "depends_on": [
+            {
+              "task_key": "notebook_task"
+            }
+          ],
+          "job_cluster_key": "job_cluster",
+          "libraries": [
+            {
+              "whl": "dist/*.whl"
+            }
+          ],
+          "python_wheel_task": {
+            "entry_point": "main",
+            "package_name": "my_jobs_as_code"
+          },
+          "task_key": "main_task"
+        }
+      ],
+      "trigger": {
+        "pause_status": "PAUSED",
+        "periodic": {
+          "interval": 1,
+          "unit": "DAYS"
+        }
+      }
+    }
+  }
+}
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/.gitignore
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/.gitignore
@ -0,0 +1,8 @@
+.databricks/
+build/
+dist/
+__pycache__/
+*.egg-info
+.venv/
+scratch/**
+!scratch/README.md
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md
@ -0,0 +1,58 @@
+# my_jobs_as_code
+
+The 'my_jobs_as_code' project was generated by using the "Jobs as code" template.
+
+## Prerequisites
+
+1. Install Databricks CLI 0.238 or later.
+   See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html).
+
+2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/).
+   We use uv to create a virtual environment and install the required dependencies.
+
+3. Authenticate to your Databricks workspace if you have not done so already:
+    ```
+    $ databricks configure
+    ```
+
+4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
+   https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for
+   **Databricks Connect** for instructions on running the included Python code from a different IDE.
+
+5. For documentation on the Databricks Asset Bundles format used
+   for this project, and for CI/CD configuration, see
+   https://docs.databricks.com/dev-tools/bundles/index.html.
+
+## Deploy and run jobs
+
+1. Create a new virtual environment and install the required dependencies:
+    ```
+    $ uv sync
+    ```
+
+2. To deploy the bundle to the development target:
+    ```
+    $ databricks bundle deploy --target dev
+    ```
+
+   *(Note that "dev" is the default target, so the `--target` parameter is optional here.)*
+
+   This deploys everything that's defined for this project.
+   For example, the default template would deploy a job called
+   `[dev yourname] my_jobs_as_code_job` to your workspace.
+   You can find that job by opening your workspace and clicking on **Workflows**.
+
+3. Similarly, to deploy a production copy, type:
+   ```
+   $ databricks bundle deploy --target prod
+   ```
+
+   Note that the default job from the template has a schedule that runs every day
+   (defined in resources/my_jobs_as_code_job.py). The schedule
+   is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes](
+   https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)).
+
+4. To run a job:
+   ```
+   $ databricks bundle run
+   ```
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml
@ -0,0 +1,48 @@
+# This is a Databricks asset bundle definition for my_jobs_as_code.
+# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
+bundle:
+  name: my_jobs_as_code
+  uuid: <UUID>
+
+experimental:
+  python:
+    # Activate virtual environment before loading resources defined in Python.
+    # If disabled, defaults to using the Python interpreter available in the current shell.
+    venv_path: .venv
+    # Functions called to load resources defined in Python. See resources/__init__.py
+    resources:
+      - "resources:load_resources"
+
+artifacts:
+  default:
+    type: whl
+    path: .
+    # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.)
+    # to ensure that changes to wheel package are picked up when used on all-purpose clusters
+    build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build
+
+include:
+  - resources/*.yml
+
+targets:
+  dev:
+    # The default target uses 'mode: development' to create a development copy.
+    # - Deployed resources get prefixed with '[dev my_user_name]'
+    # - Any job schedules and triggers are paused by default.
+    # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html.
+    mode: development
+    default: true
+    workspace:
+      host: $DATABRICKS_URL
+
+  prod:
+    mode: production
+    workspace:
+      host: $DATABRICKS_URL
+      # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy.
+      root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target}
+    permissions:
+      - user_name: $USERNAME
+        level: CAN_MANAGE
+    run_as:
+      user_name: $USERNAME
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep
@ -0,0 +1,22 @@
+# Fixtures
+
+This folder is reserved for fixtures, such as CSV files.
+
+Below is an example of how to load fixtures as a data frame:
+
+```
+import pandas as pd
+import os
+
+def get_absolute_path(*relative_parts):
+    if 'dbutils' in globals():
+        base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore
+        path = os.path.normpath(os.path.join(base_dir, *relative_parts))
+        return path if path.startswith("/Workspace") else "/Workspace" + path
+    else:
+        return os.path.join(*relative_parts)
+
+csv_file = get_absolute_path("..", "fixtures", "mycsv.csv")
+df = pd.read_csv(csv_file)
+display(df)
+```
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml
@ -0,0 +1,49 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "my_jobs_as_code"
+requires-python = ">=3.10"
+description = "wheel file based on my_jobs_as_code"
+
+# Dependencies in case the output wheel file is used as a library dependency.
+# For defining dependencies, when this package is used in Databricks, see:
+# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
+#
+# Example:
+# dependencies = [
+#     "requests==x.y.z",
+# ]
+dependencies = [
+]
+
+# see setup.py
+dynamic = ["version"]
+
+[project.entry-points.packages]
+main = "my_jobs_as_code.main:main"
+
+[tool.setuptools]
+py-modules = ["resources", "my_jobs_as_code"]
+
+[tool.uv]
+## Dependencies for local development
+dev-dependencies = [
+    "databricks-bundles==0.7.0",
+
+    ## Add code completion support for DLT
+    # "databricks-dlt",
+
+    ## databricks-connect can be used to run parts of this project locally.
+    ## See https://docs.databricks.com/dev-tools/databricks-connect.html.
+    ##
+    ## Uncomment line below to install a version of db-connect that corresponds to
+    ## the Databricks Runtime version used for this project.
+    # "databricks-connect>=15.4,<15.5",
+]
+
+override-dependencies = [
+    # pyspark package conflicts with 'databricks-connect'
+    "pyspark; sys_platform == 'never'",
+]
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/init.py
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/init.py
@ -0,0 +1,16 @@
+from databricks.bundles.core import (
+    Bundle,
+    Resources,
+    load_resources_from_current_package_module,
+)
+
+
+def load_resources(bundle: Bundle) -> Resources:
+    """
+    'load_resources' function is referenced in databricks.yml and is responsible for loading
+    bundle resources defined in Python code. This function is called by Databricks CLI during
+    bundle deployment. After deployment, this function is not used.
+    """
+
+    # the default implementation loads all Python files in 'resources' directory
+    return load_resources_from_current_package_module()
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py
@ -0,0 +1,67 @@
+from databricks.bundles.jobs import Job
+
+"""
+The main job for my_jobs_as_code.
+"""
+
+
+my_jobs_as_code_job = Job.from_dict(
+    {
+        "name": "my_jobs_as_code_job",
+        "trigger": {
+            # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
+            "periodic": {
+                "interval": 1,
+                "unit": "DAYS",
+            },
+        },
+        "email_notifications": {
+            "on_failure": [
+                "$USERNAME",
+            ],
+        },
+        "tasks": [
+            {
+                "task_key": "notebook_task",
+                "job_cluster_key": "job_cluster",
+                "notebook_task": {
+                    "notebook_path": "src/notebook.ipynb",
+                },
+            },
+            {
+                "task_key": "main_task",
+                "depends_on": [
+                    {
+                        "task_key": "notebook_task",
+                    },
+                ],
+                "job_cluster_key": "job_cluster",
+                "python_wheel_task": {
+                    "package_name": "my_jobs_as_code",
+                    "entry_point": "main",
+                },
+                "libraries": [
+                    # By default we just include the .whl file generated for the my_jobs_as_code package.
+                    # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
+                    # for more information on how to add other libraries.
+                    {
+                        "whl": "dist/*.whl",
+                    },
+                ],
+            },
+        ],
+        "job_clusters": [
+            {
+                "job_cluster_key": "job_cluster",
+                "new_cluster": {
+                    "spark_version": "15.4.x-scala2.12",
+                    "node_type_id": "i3.xlarge",
+                    "autoscale": {
+                        "min_workers": 1,
+                        "max_workers": 4,
+                    },
+                },
+            },
+        ],
+    }
+)
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md
@ -0,0 +1,4 @@
+# scratch
+
+This folder is reserved for personal, exploratory notebooks.
+By default these are not committed to Git, as 'scratch' is listed in .gitignore.
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py
@ -0,0 +1,18 @@
+"""
+setup.py configuration script describing how to build and package this project.
+
+This file is primarily used by the setuptools library and typically should not
+be executed directly. See README.md for how to deploy, test, and run
+the my_jobs_as_code project.
+"""
+
+import os
+
+from setuptools import setup
+
+local_version = os.getenv("LOCAL_VERSION")
+version = "0.0.1"
+
+setup(
+    version=f"{version}+{local_version}" if local_version else version,
+)
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/init.py
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/init.py
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py
@ -0,0 +1,25 @@
+from pyspark.sql import SparkSession, DataFrame
+
+
+def get_taxis(spark: SparkSession) -> DataFrame:
+    return spark.read.table("samples.nyctaxi.trips")
+
+
+# Create a new Databricks Connect session. If this fails,
+# check that you have configured Databricks Connect correctly.
+# See https://docs.databricks.com/dev-tools/databricks-connect.html.
+def get_spark() -> SparkSession:
+    try:
+        from databricks.connect import DatabricksSession
+
+        return DatabricksSession.builder.getOrCreate()
+    except ImportError:
+        return SparkSession.builder.getOrCreate()
+
+
+def main():
+    get_taxis(get_spark()).show(5)
+
+
+if __name__ == "__main__":
+    main()
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb
@ -0,0 +1,75 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "<UUID>",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "source": [
+    "# Default notebook\n",
+    "\n",
+    "This default notebook is executed using Databricks Workflows as defined in resources/my_jobs_as_code.job.yml."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "<UUID>",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from my_jobs_as_code import main\n",
+    "\n",
+    "main.get_taxis(spark).show(10)"
+   ]
+  }
+ ],
+ "metadata": {
+  "application/vnd.databricks.v1+notebook": {
+   "dashboards": [],
+   "language": "python",
+   "notebookMetadata": {
+    "pythonIndentUnit": 2
+   },
+   "notebookName": "notebook",
+   "widgets": {}
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py
@ -0,0 +1,8 @@
+from my_jobs_as_code.main import get_taxis, get_spark
+
+# running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml
+
+
+def test_main():
+    taxis = get_taxis(get_spark())
+    assert taxis.count() > 5
--- a/acceptance/bundle/templates/experimental-jobs-as-code/script
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/script
@ -0,0 +1,12 @@
+trace $CLI bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output
+
+cd output/my_jobs_as_code
+
+# silence uv output because it's non-deterministic
+uv sync 2> /dev/null
+
+# remove version constraint because it always creates a warning on dev builds
+cat databricks.yml | grep -v databricks_cli_version > databricks.yml.new
+mv databricks.yml.new databricks.yml
+
+trace $CLI bundle validate -t dev --output json | jq ".resources"
--- a/cmd/bundle/init.go
+++ b/cmd/bundle/init.go
@ -59,6 +59,11 @@ var nativeTemplates = []nativeTemplate{
 		hidden:      true,
 		description: "The default PyDABs template",
 	},
+	{
+		name:        "experimental-jobs-as-code",
+		hidden:      true,
+		description: "Jobs as code template (experimental)",
+	},
 	{
 		name:        customTemplate,
 		description: "Bring your own template",
--- a/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json
+++ b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json
@ -0,0 +1,28 @@
+{
+    "welcome_message": "\nWelcome to (EXPERIMENTAL) \"Jobs as code\" template for Databricks Asset Bundles!",
+    "properties": {
+        "project_name": {
+            "type": "string",
+            "default": "jobs_as_code_project",
+            "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project",
+            "order": 1,
+            "pattern": "^[A-Za-z0-9_]+$",
+            "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores."
+        },
+        "include_notebook": {
+            "type": "string",
+            "default": "yes",
+            "enum": ["yes", "no"],
+            "description": "Include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'",
+            "order": 2
+        },
+        "include_python": {
+            "type": "string",
+            "default": "yes",
+            "enum": ["yes", "no"],
+            "description": "Include a stub (sample) Python package in '{{.project_name}}/src'",
+            "order": 3
+        }
+    },
+    "success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html."
+}
--- a/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl
@ -0,0 +1,7 @@
+{{define "latest_lts_dbr_version" -}}
+  15.4.x-scala2.12
+{{- end}}
+
+{{define "latest_lts_db_connect_version_spec" -}}
+  >=15.4,<15.5
+{{- end}}
--- a/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl
@ -0,0 +1,30 @@
+# Preamble
+
+This file only template directives; it is skipped for the actual output.
+
+{{skip "__preamble"}}
+
+# TODO add DLT support, placeholder for now
+{{$notDLT := true }}
+{{$notNotebook := not (eq .include_notebook "yes")}}
+{{$notPython := not (eq .include_python "yes")}}
+
+{{if $notPython}}
+  {{skip "{{.project_name}}/src/{{.project_name}}"}}
+  {{skip "{{.project_name}}/tests/main_test.py"}}
+{{end}}
+
+{{if $notDLT}}
+  {{skip "{{.project_name}}/src/dlt_pipeline.ipynb"}}
+  {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline.py"}}
+{{end}}
+
+{{if $notNotebook}}
+  {{skip "{{.project_name}}/src/notebook.ipynb"}}
+{{end}}
+
+{{if (and $notDLT $notNotebook $notPython)}}
+  {{skip "{{.project_name}}/resources/{{.project_name}}_job.py"}}
+{{else}}
+  {{skip "{{.project_name}}/resources/.gitkeep"}}
+{{end}}
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore
@ -0,0 +1,8 @@
+.databricks/
+build/
+dist/
+__pycache__/
+*.egg-info
+.venv/
+scratch/**
+!scratch/README.md
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl
@ -0,0 +1,60 @@
+# {{.project_name}}
+
+The '{{.project_name}}' project was generated by using the "Jobs as code" template.
+
+## Prerequisites
+
+1. Install Databricks CLI 0.238 or later.
+   See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html).
+
+2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/).
+   We use uv to create a virtual environment and install the required dependencies.
+
+3. Authenticate to your Databricks workspace if you have not done so already:
+    ```
+    $ databricks configure
+    ```
+
+4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
+   https://docs.databricks.com/dev-tools/vscode-ext.html.
+   {{- if (eq .include_python "yes") }} Or read the "getting started" documentation for
+   **Databricks Connect** for instructions on running the included Python code from a different IDE.
+   {{- end}}
+
+5. For documentation on the Databricks Asset Bundles format used
+   for this project, and for CI/CD configuration, see
+   https://docs.databricks.com/dev-tools/bundles/index.html.
+
+## Deploy and run jobs
+
+1. Create a new virtual environment and install the required dependencies:
+    ```
+    $ uv sync
+    ```
+
+2. To deploy the bundle to the development target:
+    ```
+    $ databricks bundle deploy --target dev
+    ```
+
+   *(Note that "dev" is the default target, so the `--target` parameter is optional here.)*
+
+   This deploys everything that's defined for this project.
+   For example, the default template would deploy a job called
+   `[dev yourname] {{.project_name}}_job` to your workspace.
+   You can find that job by opening your workspace and clicking on **Workflows**.
+
+3. Similarly, to deploy a production copy, type:
+   ```
+   $ databricks bundle deploy --target prod
+   ```
+
+   Note that the default job from the template has a schedule that runs every day
+   (defined in resources/{{.project_name}}_job.py). The schedule
+   is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes](
+   https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)).
+
+4. To run a job:
+   ```
+   $ databricks bundle run
+   ```
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl
@ -0,0 +1,51 @@
+# This is a Databricks asset bundle definition for {{.project_name}}.
+# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
+bundle:
+  name: {{.project_name}}
+  uuid: {{bundle_uuid}}
+  databricks_cli_version: ">= 0.238.0"
+
+experimental:
+  python:
+    # Activate virtual environment before loading resources defined in Python.
+    # If disabled, defaults to using the Python interpreter available in the current shell.
+    venv_path: .venv
+    # Functions called to load resources defined in Python. See resources/__init__.py
+    resources:
+      - "resources:load_resources"
+
+{{ if .include_python -}}
+artifacts:
+  default:
+    type: whl
+    path: .
+    # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.)
+    # to ensure that changes to wheel package are picked up when used on all-purpose clusters
+    build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build
+
+{{ end -}}
+include:
+  - resources/*.yml
+
+targets:
+  dev:
+    # The default target uses 'mode: development' to create a development copy.
+    # - Deployed resources get prefixed with '[dev my_user_name]'
+    # - Any job schedules and triggers are paused by default.
+    # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html.
+    mode: development
+    default: true
+    workspace:
+      host: {{workspace_host}}
+
+  prod:
+    mode: production
+    workspace:
+      host: {{workspace_host}}
+      # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy.
+      root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target}
+    permissions:
+      - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
+        level: CAN_MANAGE
+    run_as:
+      {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl
@ -0,0 +1,27 @@
+# Fixtures
+{{- /*
+We don't want to have too many README.md files, since they
+stand out so much. But we do need to have a file here to make
+sure the folder is added to Git.
+*/}}
+
+This folder is reserved for fixtures, such as CSV files.
+
+Below is an example of how to load fixtures as a data frame:
+
+```
+import pandas as pd
+import os
+
+def get_absolute_path(*relative_parts):
+    if 'dbutils' in globals():
+        base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore
+        path = os.path.normpath(os.path.join(base_dir, *relative_parts))
+        return path if path.startswith("/Workspace") else "/Workspace" + path
+    else:
+        return os.path.join(*relative_parts)
+
+csv_file = get_absolute_path("..", "fixtures", "mycsv.csv")
+df = pd.read_csv(csv_file)
+display(df)
+```
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl
@ -0,0 +1,57 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "{{.project_name}}"
+requires-python = ">=3.10"
+description = "wheel file based on {{.project_name}}"
+
+# Dependencies in case the output wheel file is used as a library dependency.
+# For defining dependencies, when this package is used in Databricks, see:
+# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
+#
+# Example:
+# dependencies = [
+#     "requests==x.y.z",
+# ]
+dependencies = [
+]
+
+# see setup.py
+dynamic = ["version"]
+
+{{ if .include_python -}}
+[project.entry-points.packages]
+main = "{{.project_name}}.main:main"
+
+{{ end -}}
+
+[tool.setuptools]
+{{ if .include_python -}}
+py-modules = ["resources", "{{.project_name}}"]
+
+{{ else }}
+py-modules = ["resources"]
+
+{{ end -}}
+[tool.uv]
+## Dependencies for local development
+dev-dependencies = [
+    "databricks-bundles==0.7.0",
+
+    ## Add code completion support for DLT
+    # "databricks-dlt",
+
+    ## databricks-connect can be used to run parts of this project locally.
+    ## See https://docs.databricks.com/dev-tools/databricks-connect.html.
+    ##
+    ## Uncomment line below to install a version of db-connect that corresponds to
+    ## the Databricks Runtime version used for this project.
+    # "databricks-connect{{template "latest_lts_db_connect_version_spec"}}",
+]
+
+override-dependencies = [
+    # pyspark package conflicts with 'databricks-connect'
+    "pyspark; sys_platform == 'never'",
+]
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/init.py
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/init.py
@ -0,0 +1,16 @@
+from databricks.bundles.core import (
+    Bundle,
+    Resources,
+    load_resources_from_current_package_module,
+)
+
+
+def load_resources(bundle: Bundle) -> Resources:
+    """
+    'load_resources' function is referenced in databricks.yml and is responsible for loading
+    bundle resources defined in Python code. This function is called by Databricks CLI during
+    bundle deployment. After deployment, this function is not used.
+    """
+
+    # the default implementation loads all Python files in 'resources' directory
+    return load_resources_from_current_package_module()
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl
@ -0,0 +1,108 @@
+{{$include_dlt := "no" -}}
+from databricks.bundles.jobs import Job
+
+"""
+The main job for {{.project_name}}.
+
+{{- /* Clarify what this job is for for DLT-only users. */}}
+{{if and (eq $include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}}
+This job runs {{.project_name}}_pipeline on a schedule.
+{{end -}}
+"""
+
+
+{{.project_name}}_job = Job.from_dict(
+    {
+        "name": "{{.project_name}}_job",
+        "trigger": {
+            # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
+            "periodic": {
+                "interval": 1,
+                "unit": "DAYS",
+            },
+        },
+        {{- if not is_service_principal}}
+        "email_notifications": {
+            "on_failure": [
+                "{{user_name}}",
+            ],
+        },
+        {{else}}
+        {{- end -}}
+        "tasks": [
+            {{- if eq .include_notebook "yes" -}}
+            {{- "\n            " -}}
+            {
+                "task_key": "notebook_task",
+                "job_cluster_key": "job_cluster",
+                "notebook_task": {
+                    "notebook_path": "src/notebook.ipynb",
+                },
+            },
+            {{- end -}}
+            {{- if (eq $include_dlt "yes") -}}
+            {{- "\n            " -}}
+            {
+                "task_key": "refresh_pipeline",
+                {{- if (eq .include_notebook "yes" )}}
+                "depends_on": [
+                    {
+                        "task_key": "notebook_task",
+                    },
+                ],
+                {{- end}}
+                "pipeline_task": {
+                    {{- /* TODO: we should find a way that doesn't use magics for the below, like ./{{project_name}}.pipeline.yml */}}
+                    "pipeline_id": "${resources.pipelines.{{.project_name}}_pipeline.id}",
+                },
+            },
+            {{- end -}}
+            {{- if (eq .include_python "yes") -}}
+            {{- "\n            " -}}
+            {
+                "task_key": "main_task",
+                {{- if (eq $include_dlt "yes") }}
+                "depends_on": [
+                    {
+                        "task_key": "refresh_pipeline",
+                    },
+                ],
+                {{- else if (eq .include_notebook "yes" )}}
+                "depends_on": [
+                    {
+                        "task_key": "notebook_task",
+                    },
+                ],
+                {{- end}}
+                "job_cluster_key": "job_cluster",
+                "python_wheel_task": {
+                    "package_name": "{{.project_name}}",
+                    "entry_point": "main",
+                },
+                "libraries": [
+                    # By default we just include the .whl file generated for the {{.project_name}} package.
+                    # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
+                    # for more information on how to add other libraries.
+                    {
+                        "whl": "dist/*.whl",
+                    },
+                ],
+            },
+            {{- end -}}
+            {{""}}
+        ],
+        "job_clusters": [
+            {
+                "job_cluster_key": "job_cluster",
+                "new_cluster": {
+                    "spark_version": "{{template "latest_lts_dbr_version"}}",
+                    "node_type_id": "{{smallest_node_type}}",
+                    "autoscale": {
+                        "min_workers": 1,
+                        "max_workers": 4,
+                    },
+                },
+            },
+        ],
+    }
+)
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl
@ -0,0 +1,24 @@
+from databricks.bundles.pipelines import Pipeline
+
+{{.project_name}}_pipeline = Pipeline.from_dict(
+    {
+        "name": "{{.project_name}}_pipeline",
+        "target": "{{.project_name}}_${bundle.target}",
+        {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}}
+        ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog:
+        "catalog": "catalog_name",
+        {{- else}}
+        "catalog": "{{default_catalog}}",
+        {{- end}}
+        "libraries": [
+            {
+                "notebook": {
+                    "path": "src/dlt_pipeline.ipynb",
+                },
+            },
+        ],
+        "configuration": {
+            "bundle.sourcePath": "${workspace.file_path}/src",
+        },
+    }
+)
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md
@ -0,0 +1,4 @@
+# scratch
+
+This folder is reserved for personal, exploratory notebooks.
+By default these are not committed to Git, as 'scratch' is listed in .gitignore.
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl
@ -0,0 +1,18 @@
+"""
+setup.py configuration script describing how to build and package this project.
+
+This file is primarily used by the setuptools library and typically should not
+be executed directly. See README.md for how to deploy, test, and run
+the {{.project_name}} project.
+"""
+
+import os
+
+from setuptools import setup
+
+local_version = os.getenv("LOCAL_VERSION")
+version = "0.0.1"
+
+setup(
+    version=f"{version}+{local_version}" if local_version else version,
+)
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl
@ -0,0 +1,104 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "9a626959-61c8-4bba-84d2-2a4ecab1f7ec",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "source": [
+    "# DLT pipeline\n",
+    "\n",
+    "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "9198e987-5606-403d-9f6d-8f14e6a4017f",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [],
+   "source": [
+   {{- if (eq .include_python "yes") }}
+    "# Import DLT and src/{{.project_name}}\n",
+    "import dlt\n",
+    "import sys\n",
+    "\n",
+    "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n",
+    "from pyspark.sql.functions import expr\n",
+    "from {{.project_name}} import main"
+   {{else}}
+    "import dlt\n",
+    "from pyspark.sql.functions import expr\n",
+    "from pyspark.sql import SparkSession\n",
+    "\n",
+    "spark = SparkSession.builder.getOrCreate()"
+   {{end -}}
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "3fc19dba-61fd-4a89-8f8c-24fee63bfb14",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [],
+   "source": [
+    {{- if (eq .include_python "yes") }}
+    "@dlt.view\n",
+    "def taxi_raw():\n",
+    "    return main.get_taxis(spark)\n",
+    {{else}}
+    "@dlt.view\n",
+    "def taxi_raw():\n",
+    "    return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n",
+    {{end -}}
+    "\n",
+    "\n",
+    "@dlt.table\n",
+    "def filtered_taxis():\n",
+    "    return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))"
+   ]
+  }
+ ],
+ "metadata": {
+  "application/vnd.databricks.v1+notebook": {
+   "dashboards": [],
+   "language": "python",
+   "notebookMetadata": {
+    "pythonIndentUnit": 2
+   },
+   "notebookName": "dlt_pipeline",
+   "widgets": {}
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl
@ -0,0 +1,79 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "ee353e42-ff58-4955-9608-12865bd0950e",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "source": [
+    "# Default notebook\n",
+    "\n",
+    "This default notebook is executed using Databricks Workflows as defined in resources/{{.project_name}}.job.yml."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [],
+   "source": [
+   {{- if (eq .include_python "yes") }}
+    "from {{.project_name}} import main\n",
+    "\n",
+    "main.get_taxis(spark).show(10)"
+   {{else}}
+    "spark.range(10)"
+   {{end -}}
+   ]
+  }
+ ],
+ "metadata": {
+  "application/vnd.databricks.v1+notebook": {
+   "dashboards": [],
+   "language": "python",
+   "notebookMetadata": {
+    "pythonIndentUnit": 2
+   },
+   "notebookName": "notebook",
+   "widgets": {}
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/init.py.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/init.py.tmpl
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl
@ -0,0 +1,25 @@
+from pyspark.sql import SparkSession, DataFrame
+
+
+def get_taxis(spark: SparkSession) -> DataFrame:
+    return spark.read.table("samples.nyctaxi.trips")
+
+
+# Create a new Databricks Connect session. If this fails,
+# check that you have configured Databricks Connect correctly.
+# See https://docs.databricks.com/dev-tools/databricks-connect.html.
+def get_spark() -> SparkSession:
+    try:
+        from databricks.connect import DatabricksSession
+
+        return DatabricksSession.builder.getOrCreate()
+    except ImportError:
+        return SparkSession.builder.getOrCreate()
+
+
+def main():
+    get_taxis(get_spark()).show(5)
+
+
+if __name__ == "__main__":
+    main()
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl
@ -0,0 +1,8 @@
+from {{.project_name}}.main import get_taxis, get_spark
+
+# running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml
+
+
+def test_main():
+    taxis = get_taxis(get_spark())
+    assert taxis.count() > 5