Add experimental-jobs-as-code template (#2177)

## Changes Add experimental-jobs-as-code template allowing defining jobs using Python instead of YAML through the `databricks-bundles` PyPI package. ## Tests Manually and acceptance tests.
2025-01-20 11:15:11 +01:00 · 2025-01-20 11:15:11 +01:00 · 31c10c1b82
parent 7034793d1d
commit 31c10c1b82
36 changed files with 1182 additions and 0 deletions
--- a/acceptance/acceptance_test.go
+++ b/acceptance/acceptance_test.go
@ -8,6 +8,7 @@ import (
 	"os"
 	"os/exec"
 	"path/filepath"
 	"regexp"
 	"runtime"
 	"slices"
 	"sort"
@ -393,6 +394,16 @@ func CopyDir(src, dst string, inputs, outputs map[string]bool) error {
 }
 func ListDir(t *testing.T, src string) ([]string, error) {
 	// exclude folders in .gitignore from comparison
 	ignored := []string{
 		"\\.ruff_cache",
 		"\\.venv",
 		".*\\.egg-info",
 		"__pycache__",
 		// depends on uv version
 		"uv.lock",
 	}
 	var files []string
 	err := filepath.Walk(src, func(path string, info os.FileInfo, err error) error {
 		if err != nil {
@ -400,7 +411,19 @@ func ListDir(t *testing.T, src string) ([]string, error) {
 		}
 		if info.IsDir() {
 			for _, ignoredFolder := range ignored {
 				if matched, _ := regexp.MatchString(ignoredFolder, info.Name()); matched {
 					return filepath.SkipDir
 				}
 			}
 			return nil
 		} else {
 			for _, ignoredFolder := range ignored {
 				if matched, _ := regexp.MatchString(ignoredFolder, info.Name()); matched {
 					return nil
 				}
 			}
 		}
 		relPath, err := filepath.Rel(src, path)
--- a/acceptance/bundle/templates/experimental-jobs-as-code/input.json
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/input.json
@ -0,0 +1,5 @@
 {
  "project_name": "my_jobs_as_code",
  "include_notebook": "yes",
  "include_python": "yes"
 }
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output.txt
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output.txt
@ -0,0 +1,85 @@
 >>> $CLI bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output
 Welcome to (EXPERIMENTAL) "Jobs as code" template for Databricks Asset Bundles!
 Workspace to use (auto-detected, edit in 'my_jobs_as_code/databricks.yml'): $DATABRICKS_URL
 ✨ Your new project has been created in the 'my_jobs_as_code' directory!
 Please refer to the README.md file for "getting started" instructions.
 See also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html.
 >>> $CLI bundle validate -t dev --output json
 {
  "jobs": {
    "my_jobs_as_code_job": {
      "deployment": {
        "kind": "BUNDLE",
        "metadata_file_path": "/Workspace/Users/$USERNAME/.bundle/my_jobs_as_code/dev/state/metadata.json"
      },
      "edit_mode": "UI_LOCKED",
      "email_notifications": {
        "on_failure": [
          "$USERNAME"
        ]
      },
      "format": "MULTI_TASK",
      "job_clusters": [
        {
          "job_cluster_key": "job_cluster",
          "new_cluster": {
            "autoscale": {
              "max_workers": 4,
              "min_workers": 1
            },
            "node_type_id": "i3.xlarge",
            "spark_version": "15.4.x-scala2.12"
          }
        }
      ],
      "max_concurrent_runs": 4,
      "name": "[dev $USERNAME] my_jobs_as_code_job",
      "permissions": [],
      "queue": {
        "enabled": true
      },
      "tags": {
        "dev": "$USERNAME"
      },
      "tasks": [
        {
          "job_cluster_key": "job_cluster",
          "notebook_task": {
            "notebook_path": "/Workspace/Users/$USERNAME/.bundle/my_jobs_as_code/dev/files/src/notebook"
          },
          "task_key": "notebook_task"
        },
        {
          "depends_on": [
            {
              "task_key": "notebook_task"
            }
          ],
          "job_cluster_key": "job_cluster",
          "libraries": [
            {
              "whl": "dist/*.whl"
            }
          ],
          "python_wheel_task": {
            "entry_point": "main",
            "package_name": "my_jobs_as_code"
          },
          "task_key": "main_task"
        }
      ],
      "trigger": {
        "pause_status": "PAUSED",
        "periodic": {
          "interval": 1,
          "unit": "DAYS"
        }
      }
    }
  }
 }
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/.gitignore
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/.gitignore
@ -0,0 +1,8 @@
 .databricks/
 build/
 dist/
 __pycache__/
 *.egg-info
 .venv/
 scratch/**
 !scratch/README.md
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/README.md
@ -0,0 +1,58 @@
 # my_jobs_as_code
 The 'my_jobs_as_code' project was generated by using the "Jobs as code" template.
 ## Prerequisites
 1. Install Databricks CLI 0.238 or later.
   See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html).
 2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/).
   We use uv to create a virtual environment and install the required dependencies.
 3. Authenticate to your Databricks workspace if you have not done so already:
    ```
    $ databricks configure
    ```
 4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
   https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for
   **Databricks Connect** for instructions on running the included Python code from a different IDE.
 5. For documentation on the Databricks Asset Bundles format used
   for this project, and for CI/CD configuration, see
   https://docs.databricks.com/dev-tools/bundles/index.html.
 ## Deploy and run jobs
 1. Create a new virtual environment and install the required dependencies:
    ```
    $ uv sync
    ```
 2. To deploy the bundle to the development target:
    ```
    $ databricks bundle deploy --target dev
    ```
   *(Note that "dev" is the default target, so the `--target` parameter is optional here.)*
   This deploys everything that's defined for this project.
   For example, the default template would deploy a job called
   `[dev yourname] my_jobs_as_code_job` to your workspace.
   You can find that job by opening your workspace and clicking on **Workflows**.
 3. Similarly, to deploy a production copy, type:
   ```
   $ databricks bundle deploy --target prod
   ```
   Note that the default job from the template has a schedule that runs every day
   (defined in resources/my_jobs_as_code_job.py). The schedule
   is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes](
   https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)).
 4. To run a job:
   ```
   $ databricks bundle run
   ```
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/databricks.yml
@ -0,0 +1,48 @@
 # This is a Databricks asset bundle definition for my_jobs_as_code.
 # See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
 bundle:
  name: my_jobs_as_code
  uuid: <UUID>
 experimental:
  python:
    # Activate virtual environment before loading resources defined in Python.
    # If disabled, defaults to using the Python interpreter available in the current shell.
    venv_path: .venv
    # Functions called to load resources defined in Python. See resources/__init__.py
    resources:
      - "resources:load_resources"
 artifacts:
  default:
    type: whl
    path: .
    # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.)
    # to ensure that changes to wheel package are picked up when used on all-purpose clusters
    build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build
 include:
  - resources/*.yml
 targets:
  dev:
    # The default target uses 'mode: development' to create a development copy.
    # - Deployed resources get prefixed with '[dev my_user_name]'
    # - Any job schedules and triggers are paused by default.
    # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html.
    mode: development
    default: true
    workspace:
      host: $DATABRICKS_URL
  prod:
    mode: production
    workspace:
      host: $DATABRICKS_URL
      # We explicitly specify /Workspace/Users/$USERNAME to make sure we only have a single copy.
      root_path: /Workspace/Users/$USERNAME/.bundle/${bundle.name}/${bundle.target}
    permissions:
      - user_name: $USERNAME
        level: CAN_MANAGE
    run_as:
      user_name: $USERNAME
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/fixtures/.gitkeep
@ -0,0 +1,22 @@
 # Fixtures
 This folder is reserved for fixtures, such as CSV files.
 Below is an example of how to load fixtures as a data frame:
 ```
 import pandas as pd
 import os
 def get_absolute_path(*relative_parts):
    if 'dbutils' in globals():
        base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore
        path = os.path.normpath(os.path.join(base_dir, *relative_parts))
        return path if path.startswith("/Workspace") else "/Workspace" + path
    else:
        return os.path.join(*relative_parts)
 csv_file = get_absolute_path("..", "fixtures", "mycsv.csv")
 df = pd.read_csv(csv_file)
 display(df)
 ```
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/pyproject.toml
@ -0,0 +1,49 @@
 [build-system]
 requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "my_jobs_as_code"
 requires-python = ">=3.10"
 description = "wheel file based on my_jobs_as_code"
 # Dependencies in case the output wheel file is used as a library dependency.
 # For defining dependencies, when this package is used in Databricks, see:
 # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
 #
 # Example:
 # dependencies = [
 #     "requests==x.y.z",
 # ]
 dependencies = [
 ]
 # see setup.py
 dynamic = ["version"]
 [project.entry-points.packages]
 main = "my_jobs_as_code.main:main"
 [tool.setuptools]
 py-modules = ["resources", "my_jobs_as_code"]
 [tool.uv]
 ## Dependencies for local development
 dev-dependencies = [
    "databricks-bundles==0.7.0",
    ## Add code completion support for DLT
    # "databricks-dlt",
    ## databricks-connect can be used to run parts of this project locally.
    ## See https://docs.databricks.com/dev-tools/databricks-connect.html.
    ##
    ## Uncomment line below to install a version of db-connect that corresponds to
    ## the Databricks Runtime version used for this project.
    # "databricks-connect>=15.4,<15.5",
 ]
 override-dependencies = [
    # pyspark package conflicts with 'databricks-connect'
    "pyspark; sys_platform == 'never'",
 ]
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/init.py
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/init.py
@ -0,0 +1,16 @@
 from databricks.bundles.core import (
    Bundle,
    Resources,
    load_resources_from_current_package_module,
 )
 def load_resources(bundle: Bundle) -> Resources:
    """
    'load_resources' function is referenced in databricks.yml and is responsible for loading
    bundle resources defined in Python code. This function is called by Databricks CLI during
    bundle deployment. After deployment, this function is not used.
    """
    # the default implementation loads all Python files in 'resources' directory
    return load_resources_from_current_package_module()
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/resources/my_jobs_as_code_job.py
@ -0,0 +1,67 @@
 from databricks.bundles.jobs import Job
 """
 The main job for my_jobs_as_code.
 """
 my_jobs_as_code_job = Job.from_dict(
    {
        "name": "my_jobs_as_code_job",
        "trigger": {
            # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
            "periodic": {
                "interval": 1,
                "unit": "DAYS",
            },
        },
        "email_notifications": {
            "on_failure": [
                "$USERNAME",
            ],
        },
        "tasks": [
            {
                "task_key": "notebook_task",
                "job_cluster_key": "job_cluster",
                "notebook_task": {
                    "notebook_path": "src/notebook.ipynb",
                },
            },
            {
                "task_key": "main_task",
                "depends_on": [
                    {
                        "task_key": "notebook_task",
                    },
                ],
                "job_cluster_key": "job_cluster",
                "python_wheel_task": {
                    "package_name": "my_jobs_as_code",
                    "entry_point": "main",
                },
                "libraries": [
                    # By default we just include the .whl file generated for the my_jobs_as_code package.
                    # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
                    # for more information on how to add other libraries.
                    {
                        "whl": "dist/*.whl",
                    },
                ],
            },
        ],
        "job_clusters": [
            {
                "job_cluster_key": "job_cluster",
                "new_cluster": {
                    "spark_version": "15.4.x-scala2.12",
                    "node_type_id": "i3.xlarge",
                    "autoscale": {
                        "min_workers": 1,
                        "max_workers": 4,
                    },
                },
            },
        ],
    }
 )
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/scratch/README.md
@ -0,0 +1,4 @@
 # scratch
 This folder is reserved for personal, exploratory notebooks.
 By default these are not committed to Git, as 'scratch' is listed in .gitignore.
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/setup.py
@ -0,0 +1,18 @@
 """
 setup.py configuration script describing how to build and package this project.
 This file is primarily used by the setuptools library and typically should not
 be executed directly. See README.md for how to deploy, test, and run
 the my_jobs_as_code project.
 """
 import os
 from setuptools import setup
 local_version = os.getenv("LOCAL_VERSION")
 version = "0.0.1"
 setup(
    version=f"{version}+{local_version}" if local_version else version,
 )
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/init.py
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/init.py
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/my_jobs_as_code/main.py
@ -0,0 +1,25 @@
 from pyspark.sql import SparkSession, DataFrame
 def get_taxis(spark: SparkSession) -> DataFrame:
    return spark.read.table("samples.nyctaxi.trips")
 # Create a new Databricks Connect session. If this fails,
 # check that you have configured Databricks Connect correctly.
 # See https://docs.databricks.com/dev-tools/databricks-connect.html.
 def get_spark() -> SparkSession:
    try:
        from databricks.connect import DatabricksSession
        return DatabricksSession.builder.getOrCreate()
    except ImportError:
        return SparkSession.builder.getOrCreate()
 def main():
    get_taxis(get_spark()).show(5)
 if __name__ == "__main__":
    main()
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/src/notebook.ipynb
@ -0,0 +1,75 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "<UUID>",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "# Default notebook\n",
    "\n",
    "This default notebook is executed using Databricks Workflows as defined in resources/my_jobs_as_code.job.yml."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "<UUID>",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "from my_jobs_as_code import main\n",
    "\n",
    "main.get_taxis(spark).show(10)"
   ]
  }
 ],
 "metadata": {
  "application/vnd.databricks.v1+notebook": {
   "dashboards": [],
   "language": "python",
   "notebookMetadata": {
    "pythonIndentUnit": 2
   },
   "notebookName": "notebook",
   "widgets": {}
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/output/my_jobs_as_code/tests/main_test.py
@ -0,0 +1,8 @@
 from my_jobs_as_code.main import get_taxis, get_spark
 # running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml
 def test_main():
    taxis = get_taxis(get_spark())
    assert taxis.count() > 5
--- a/acceptance/bundle/templates/experimental-jobs-as-code/script
+++ b/acceptance/bundle/templates/experimental-jobs-as-code/script
@ -0,0 +1,12 @@
 trace $CLI bundle init experimental-jobs-as-code --config-file ./input.json --output-dir output
 cd output/my_jobs_as_code
 # silence uv output because it's non-deterministic
 uv sync 2> /dev/null
 # remove version constraint because it always creates a warning on dev builds
 cat databricks.yml | grep -v databricks_cli_version > databricks.yml.new
 mv databricks.yml.new databricks.yml
 trace $CLI bundle validate -t dev --output json | jq ".resources"
--- a/cmd/bundle/init.go
+++ b/cmd/bundle/init.go
@ -59,6 +59,11 @@ var nativeTemplates = []nativeTemplate{
 		hidden:      true,
 		description: "The default PyDABs template",
 	},
 	{
 		name:        "experimental-jobs-as-code",
 		hidden:      true,
 		description: "Jobs as code template (experimental)",
 	},
 	{
 		name:        customTemplate,
 		description: "Bring your own template",
--- a/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json
+++ b/libs/template/templates/experimental-jobs-as-code/databricks_template_schema.json
@ -0,0 +1,28 @@
 {
    "welcome_message": "\nWelcome to (EXPERIMENTAL) \"Jobs as code\" template for Databricks Asset Bundles!",
    "properties": {
        "project_name": {
            "type": "string",
            "default": "jobs_as_code_project",
            "description": "Please provide the following details to tailor the template to your preferences.\n\nUnique name for this project",
            "order": 1,
            "pattern": "^[A-Za-z0-9_]+$",
            "pattern_match_failure_message": "Name must consist of letters, numbers, and underscores."
        },
        "include_notebook": {
            "type": "string",
            "default": "yes",
            "enum": ["yes", "no"],
            "description": "Include a stub (sample) notebook in '{{.project_name}}{{path_separator}}src'",
            "order": 2
        },
        "include_python": {
            "type": "string",
            "default": "yes",
            "enum": ["yes", "no"],
            "description": "Include a stub (sample) Python package in '{{.project_name}}/src'",
            "order": 3
        }
    },
    "success_message": "Workspace to use (auto-detected, edit in '{{.project_name}}/databricks.yml'): {{workspace_host}}\n\n✨ Your new project has been created in the '{{.project_name}}' directory!\n\nPlease refer to the README.md file for \"getting started\" instructions.\nSee also the documentation at https://docs.databricks.com/dev-tools/bundles/index.html."
 }
--- a/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/library/versions.tmpl
@ -0,0 +1,7 @@
 {{define "latest_lts_dbr_version" -}}
  15.4.x-scala2.12
 {{- end}}
 {{define "latest_lts_db_connect_version_spec" -}}
  >=15.4,<15.5
 {{- end}}
--- a/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/__preamble.tmpl
@ -0,0 +1,30 @@
 # Preamble
 This file only template directives; it is skipped for the actual output.
 {{skip "__preamble"}}
 # TODO add DLT support, placeholder for now
 {{$notDLT := true }}
 {{$notNotebook := not (eq .include_notebook "yes")}}
 {{$notPython := not (eq .include_python "yes")}}
 {{if $notPython}}
  {{skip "{{.project_name}}/src/{{.project_name}}"}}
  {{skip "{{.project_name}}/tests/main_test.py"}}
 {{end}}
 {{if $notDLT}}
  {{skip "{{.project_name}}/src/dlt_pipeline.ipynb"}}
  {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline.py"}}
 {{end}}
 {{if $notNotebook}}
  {{skip "{{.project_name}}/src/notebook.ipynb"}}
 {{end}}
 {{if (and $notDLT $notNotebook $notPython)}}
  {{skip "{{.project_name}}/resources/{{.project_name}}_job.py"}}
 {{else}}
  {{skip "{{.project_name}}/resources/.gitkeep"}}
 {{end}}
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/.gitignore
@ -0,0 +1,8 @@
 .databricks/
 build/
 dist/
 __pycache__/
 *.egg-info
 .venv/
 scratch/**
 !scratch/README.md
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/README.md.tmpl
@ -0,0 +1,60 @@
 # {{.project_name}}
 The '{{.project_name}}' project was generated by using the "Jobs as code" template.
 ## Prerequisites
 1. Install Databricks CLI 0.238 or later.
   See [Install or update the Databricks CLI](https://docs.databricks.com/en/dev-tools/cli/install.html).
 2. Install uv. See [Installing uv](https://docs.astral.sh/uv/getting-started/installation/).
   We use uv to create a virtual environment and install the required dependencies.
 3. Authenticate to your Databricks workspace if you have not done so already:
    ```
    $ databricks configure
    ```
 4. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
   https://docs.databricks.com/dev-tools/vscode-ext.html.
   {{- if (eq .include_python "yes") }} Or read the "getting started" documentation for
   **Databricks Connect** for instructions on running the included Python code from a different IDE.
   {{- end}}
 5. For documentation on the Databricks Asset Bundles format used
   for this project, and for CI/CD configuration, see
   https://docs.databricks.com/dev-tools/bundles/index.html.
 ## Deploy and run jobs
 1. Create a new virtual environment and install the required dependencies:
    ```
    $ uv sync
    ```
 2. To deploy the bundle to the development target:
    ```
    $ databricks bundle deploy --target dev
    ```
   *(Note that "dev" is the default target, so the `--target` parameter is optional here.)*
   This deploys everything that's defined for this project.
   For example, the default template would deploy a job called
   `[dev yourname] {{.project_name}}_job` to your workspace.
   You can find that job by opening your workspace and clicking on **Workflows**.
 3. Similarly, to deploy a production copy, type:
   ```
   $ databricks bundle deploy --target prod
   ```
   Note that the default job from the template has a schedule that runs every day
   (defined in resources/{{.project_name}}_job.py). The schedule
   is paused when deploying in development mode (see [Databricks Asset Bundle deployment modes](
   https://docs.databricks.com/dev-tools/bundles/deployment-modes.html)).
 4. To run a job:
   ```
   $ databricks bundle run
   ```
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/databricks.yml.tmpl
@ -0,0 +1,51 @@
 # This is a Databricks asset bundle definition for {{.project_name}}.
 # See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
 bundle:
  name: {{.project_name}}
  uuid: {{bundle_uuid}}
  databricks_cli_version: ">= 0.238.0"
 experimental:
  python:
    # Activate virtual environment before loading resources defined in Python.
    # If disabled, defaults to using the Python interpreter available in the current shell.
    venv_path: .venv
    # Functions called to load resources defined in Python. See resources/__init__.py
    resources:
      - "resources:load_resources"
 {{ if .include_python -}}
 artifacts:
  default:
    type: whl
    path: .
    # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.)
    # to ensure that changes to wheel package are picked up when used on all-purpose clusters
    build: LOCAL_VERSION=$(date +%Y%m%d.%H%M%S) uv build
 {{ end -}}
 include:
  - resources/*.yml
 targets:
  dev:
    # The default target uses 'mode: development' to create a development copy.
    # - Deployed resources get prefixed with '[dev my_user_name]'
    # - Any job schedules and triggers are paused by default.
    # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html.
    mode: development
    default: true
    workspace:
      host: {{workspace_host}}
  prod:
    mode: production
    workspace:
      host: {{workspace_host}}
      # We explicitly specify /Workspace/Users/{{user_name}} to make sure we only have a single copy.
      root_path: /Workspace/Users/{{user_name}}/.bundle/${bundle.name}/${bundle.target}
    permissions:
      - {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
        level: CAN_MANAGE
    run_as:
      {{if is_service_principal}}service_principal{{else}}user{{end}}_name: {{user_name}}
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/fixtures/.gitkeep.tmpl
@ -0,0 +1,27 @@
 # Fixtures
 {{- /*
 We don't want to have too many README.md files, since they
 stand out so much. But we do need to have a file here to make
 sure the folder is added to Git.
 */}}
 This folder is reserved for fixtures, such as CSV files.
 Below is an example of how to load fixtures as a data frame:
 ```
 import pandas as pd
 import os
 def get_absolute_path(*relative_parts):
    if 'dbutils' in globals():
        base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore
        path = os.path.normpath(os.path.join(base_dir, *relative_parts))
        return path if path.startswith("/Workspace") else "/Workspace" + path
    else:
        return os.path.join(*relative_parts)
 csv_file = get_absolute_path("..", "fixtures", "mycsv.csv")
 df = pd.read_csv(csv_file)
 display(df)
 ```
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/pyproject.toml.tmpl
@ -0,0 +1,57 @@
 [build-system]
 requires = ["setuptools>=61.0"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "{{.project_name}}"
 requires-python = ">=3.10"
 description = "wheel file based on {{.project_name}}"
 # Dependencies in case the output wheel file is used as a library dependency.
 # For defining dependencies, when this package is used in Databricks, see:
 # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
 #
 # Example:
 # dependencies = [
 #     "requests==x.y.z",
 # ]
 dependencies = [
 ]
 # see setup.py
 dynamic = ["version"]
 {{ if .include_python -}}
 [project.entry-points.packages]
 main = "{{.project_name}}.main:main"
 {{ end -}}
 [tool.setuptools]
 {{ if .include_python -}}
 py-modules = ["resources", "{{.project_name}}"]
 {{ else }}
 py-modules = ["resources"]
 {{ end -}}
 [tool.uv]
 ## Dependencies for local development
 dev-dependencies = [
    "databricks-bundles==0.7.0",
    ## Add code completion support for DLT
    # "databricks-dlt",
    ## databricks-connect can be used to run parts of this project locally.
    ## See https://docs.databricks.com/dev-tools/databricks-connect.html.
    ##
    ## Uncomment line below to install a version of db-connect that corresponds to
    ## the Databricks Runtime version used for this project.
    # "databricks-connect{{template "latest_lts_db_connect_version_spec"}}",
 ]
 override-dependencies = [
    # pyspark package conflicts with 'databricks-connect'
    "pyspark; sys_platform == 'never'",
 ]
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/init.py
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/init.py
@ -0,0 +1,16 @@
 from databricks.bundles.core import (
    Bundle,
    Resources,
    load_resources_from_current_package_module,
 )
 def load_resources(bundle: Bundle) -> Resources:
    """
    'load_resources' function is referenced in databricks.yml and is responsible for loading
    bundle resources defined in Python code. This function is called by Databricks CLI during
    bundle deployment. After deployment, this function is not used.
    """
    # the default implementation loads all Python files in 'resources' directory
    return load_resources_from_current_package_module()
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_job.py.tmpl
@ -0,0 +1,108 @@
 {{$include_dlt := "no" -}}
 from databricks.bundles.jobs import Job
 """
 The main job for {{.project_name}}.
 {{- /* Clarify what this job is for for DLT-only users. */}}
 {{if and (eq $include_dlt "yes") (and (eq .include_notebook "no") (eq .include_python "no")) -}}
 This job runs {{.project_name}}_pipeline on a schedule.
 {{end -}}
 """
 {{.project_name}}_job = Job.from_dict(
    {
        "name": "{{.project_name}}_job",
        "trigger": {
            # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
            "periodic": {
                "interval": 1,
                "unit": "DAYS",
            },
        },
        {{- if not is_service_principal}}
        "email_notifications": {
            "on_failure": [
                "{{user_name}}",
            ],
        },
        {{else}}
        {{- end -}}
        "tasks": [
            {{- if eq .include_notebook "yes" -}}
            {{- "\n            " -}}
            {
                "task_key": "notebook_task",
                "job_cluster_key": "job_cluster",
                "notebook_task": {
                    "notebook_path": "src/notebook.ipynb",
                },
            },
            {{- end -}}
            {{- if (eq $include_dlt "yes") -}}
            {{- "\n            " -}}
            {
                "task_key": "refresh_pipeline",
                {{- if (eq .include_notebook "yes" )}}
                "depends_on": [
                    {
                        "task_key": "notebook_task",
                    },
                ],
                {{- end}}
                "pipeline_task": {
                    {{- /* TODO: we should find a way that doesn't use magics for the below, like ./{{project_name}}.pipeline.yml */}}
                    "pipeline_id": "${resources.pipelines.{{.project_name}}_pipeline.id}",
                },
            },
            {{- end -}}
            {{- if (eq .include_python "yes") -}}
            {{- "\n            " -}}
            {
                "task_key": "main_task",
                {{- if (eq $include_dlt "yes") }}
                "depends_on": [
                    {
                        "task_key": "refresh_pipeline",
                    },
                ],
                {{- else if (eq .include_notebook "yes" )}}
                "depends_on": [
                    {
                        "task_key": "notebook_task",
                    },
                ],
                {{- end}}
                "job_cluster_key": "job_cluster",
                "python_wheel_task": {
                    "package_name": "{{.project_name}}",
                    "entry_point": "main",
                },
                "libraries": [
                    # By default we just include the .whl file generated for the {{.project_name}} package.
                    # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
                    # for more information on how to add other libraries.
                    {
                        "whl": "dist/*.whl",
                    },
                ],
            },
            {{- end -}}
            {{""}}
        ],
        "job_clusters": [
            {
                "job_cluster_key": "job_cluster",
                "new_cluster": {
                    "spark_version": "{{template "latest_lts_dbr_version"}}",
                    "node_type_id": "{{smallest_node_type}}",
                    "autoscale": {
                        "min_workers": 1,
                        "max_workers": 4,
                    },
                },
            },
        ],
    }
 )
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/resources/{{.project_name}}_pipeline.py.tmpl
@ -0,0 +1,24 @@
 from databricks.bundles.pipelines import Pipeline
 {{.project_name}}_pipeline = Pipeline.from_dict(
    {
        "name": "{{.project_name}}_pipeline",
        "target": "{{.project_name}}_${bundle.target}",
        {{- if or (eq default_catalog "") (eq default_catalog "hive_metastore")}}
        ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog:
        "catalog": "catalog_name",
        {{- else}}
        "catalog": "{{default_catalog}}",
        {{- end}}
        "libraries": [
            {
                "notebook": {
                    "path": "src/dlt_pipeline.ipynb",
                },
            },
        ],
        "configuration": {
            "bundle.sourcePath": "${workspace.file_path}/src",
        },
    }
 )
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/scratch/README.md
@ -0,0 +1,4 @@
 # scratch
 This folder is reserved for personal, exploratory notebooks.
 By default these are not committed to Git, as 'scratch' is listed in .gitignore.
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/setup.py.tmpl
@ -0,0 +1,18 @@
 """
 setup.py configuration script describing how to build and package this project.
 This file is primarily used by the setuptools library and typically should not
 be executed directly. See README.md for how to deploy, test, and run
 the {{.project_name}} project.
 """
 import os
 from setuptools import setup
 local_version = os.getenv("LOCAL_VERSION")
 version = "0.0.1"
 setup(
    version=f"{version}+{local_version}" if local_version else version,
 )
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl
@ -0,0 +1,104 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "9a626959-61c8-4bba-84d2-2a4ecab1f7ec",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "# DLT pipeline\n",
    "\n",
    "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/{{.project_name}}.pipeline.yml."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "9198e987-5606-403d-9f6d-8f14e6a4017f",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
   {{- if (eq .include_python "yes") }}
    "# Import DLT and src/{{.project_name}}\n",
    "import dlt\n",
    "import sys\n",
    "\n",
    "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n",
    "from pyspark.sql.functions import expr\n",
    "from {{.project_name}} import main"
   {{else}}
    "import dlt\n",
    "from pyspark.sql.functions import expr\n",
    "from pyspark.sql import SparkSession\n",
    "\n",
    "spark = SparkSession.builder.getOrCreate()"
   {{end -}}
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "3fc19dba-61fd-4a89-8f8c-24fee63bfb14",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    {{- if (eq .include_python "yes") }}
    "@dlt.view\n",
    "def taxi_raw():\n",
    "    return main.get_taxis(spark)\n",
    {{else}}
    "@dlt.view\n",
    "def taxi_raw():\n",
    "    return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n",
    {{end -}}
    "\n",
    "\n",
    "@dlt.table\n",
    "def filtered_taxis():\n",
    "    return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))"
   ]
  }
 ],
 "metadata": {
  "application/vnd.databricks.v1+notebook": {
   "dashboards": [],
   "language": "python",
   "notebookMetadata": {
    "pythonIndentUnit": 2
   },
   "notebookName": "dlt_pipeline",
   "widgets": {}
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/notebook.ipynb.tmpl
@ -0,0 +1,79 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "ee353e42-ff58-4955-9608-12865bd0950e",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "# Default notebook\n",
    "\n",
    "This default notebook is executed using Databricks Workflows as defined in resources/{{.project_name}}.job.yml."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "6bca260b-13d1-448f-8082-30b60a85c9ae",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
   {{- if (eq .include_python "yes") }}
    "from {{.project_name}} import main\n",
    "\n",
    "main.get_taxis(spark).show(10)"
   {{else}}
    "spark.range(10)"
   {{end -}}
   ]
  }
 ],
 "metadata": {
  "application/vnd.databricks.v1+notebook": {
   "dashboards": [],
   "language": "python",
   "notebookMetadata": {
    "pythonIndentUnit": 2
   },
   "notebookName": "notebook",
   "widgets": {}
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/init.py.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/init.py.tmpl
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/src/{{.project_name}}/main.py.tmpl
@ -0,0 +1,25 @@
 from pyspark.sql import SparkSession, DataFrame
 def get_taxis(spark: SparkSession) -> DataFrame:
    return spark.read.table("samples.nyctaxi.trips")
 # Create a new Databricks Connect session. If this fails,
 # check that you have configured Databricks Connect correctly.
 # See https://docs.databricks.com/dev-tools/databricks-connect.html.
 def get_spark() -> SparkSession:
    try:
        from databricks.connect import DatabricksSession
        return DatabricksSession.builder.getOrCreate()
    except ImportError:
        return SparkSession.builder.getOrCreate()
 def main():
    get_taxis(get_spark()).show(5)
 if __name__ == "__main__":
    main()
--- a/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl
+++ b/libs/template/templates/experimental-jobs-as-code/template/{{.project_name}}/tests/main_test.py.tmpl
@ -0,0 +1,8 @@
 from {{.project_name}}.main import get_taxis, get_spark
 # running tests requires installing databricks-connect, e.g. by uncommenting it in pyproject.toml
 def test_main():
    taxis = get_taxis(get_spark())
    assert taxis.count() > 5