acc: Include full output for default-python/classic (#2391)

## Tests Include full output of default-python/classic so it can be used as a base for diffs in cloud tests #2383
2025-02-27 11:16:06 +01:00 · 2025-02-27 11:16:06 +01:00 · 13ac52391d
parent 81606cfcbc
commit 13ac52391d
20 changed files with 533 additions and 2 deletions
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/builtins.pyi
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/builtins.pyi
@ -0,0 +1,3 @@
+# Typings for Pylance in Visual Studio Code
+# see https://github.com/microsoft/pyright/blob/main/docs/builtins.md
+from databricks.sdk.runtime import *
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json
@ -0,0 +1,7 @@
+{
+    "recommendations": [
+        "databricks.databricks",
+        "ms-python.vscode-pylance",
+        "redhat.vscode-yaml"
+    ]
+}
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json
@ -0,0 +1,16 @@
+{
+    "python.analysis.stubPath": ".vscode",
+    "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
+    "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
+    "python.testing.pytestArgs": [
+        "."
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true,
+    "python.analysis.extraPaths": ["src"],
+    "files.exclude": {
+        "**/*.egg-info": true,
+        "**/__pycache__": true,
+        ".pytest_cache": true,
+    },
+}
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md
@ -0,0 +1,49 @@
+# my_default_python
+
+The 'my_default_python' project was generated by using the default-python template.
+
+## Getting started
+
+1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html
+
+2. Authenticate to your Databricks workspace, if you have not done so already:
+    ```
+    $ databricks configure
+    ```
+
+3. To deploy a development copy of this project, type:
+    ```
+    $ databricks bundle deploy --target dev
+    ```
+    (Note that "dev" is the default target, so the `--target` parameter
+    is optional here.)
+
+    This deploys everything that's defined for this project.
+    For example, the default template would deploy a job called
+    `[dev yourname] my_default_python_job` to your workspace.
+    You can find that job by opening your workpace and clicking on **Workflows**.
+
+4. Similarly, to deploy a production copy, type:
+   ```
+   $ databricks bundle deploy --target prod
+   ```
+
+   Note that the default job from the template has a schedule that runs every day
+   (defined in resources/my_default_python.job.yml). The schedule
+   is paused when deploying in development mode (see
+   https://docs.databricks.com/dev-tools/bundles/deployment-modes.html).
+
+5. To run a job or pipeline, use the "run" command:
+   ```
+   $ databricks bundle run
+   ```
+6. Optionally, install the Databricks extension for Visual Studio code for local development from
+   https://docs.databricks.com/dev-tools/vscode-ext.html. It can configure your
+   virtual environment and setup Databricks Connect for running unit tests locally.
+   When not using these tools, consult your development environment's documentation
+   and/or the documentation for Databricks Connect for manually setting up your environment
+   (https://docs.databricks.com/en/dev-tools/databricks-connect/python/index.html).
+
+7. For documentation on the Databricks asset bundles format used
+   for this project, and for CI/CD configuration, see
+   https://docs.databricks.com/dev-tools/bundles/index.html.
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml
@ -0,0 +1,29 @@
+# This is a Databricks asset bundle definition for my_default_python.
+# See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
+bundle:
+  name: my_default_python
+  uuid: [UUID]
+
+include:
+  - resources/*.yml
+
+targets:
+  dev:
+    # The default target uses 'mode: development' to create a development copy.
+    # - Deployed resources get prefixed with '[dev my_user_name]'
+    # - Any job schedules and triggers are paused by default.
+    # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html.
+    mode: development
+    default: true
+    workspace:
+      host: [DATABRICKS_URL]
+
+  prod:
+    mode: production
+    workspace:
+      host: [DATABRICKS_URL]
+      # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy.
+      root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target}
+    permissions:
+      - user_name: [USERNAME]
+        level: CAN_MANAGE
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep
@ -0,0 +1,22 @@
+# Fixtures
+
+This folder is reserved for fixtures, such as CSV files.
+
+Below is an example of how to load fixtures as a data frame:
+
+```
+import pandas as pd
+import os
+
+def get_absolute_path(*relative_parts):
+    if 'dbutils' in globals():
+        base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore
+        path = os.path.normpath(os.path.join(base_dir, *relative_parts))
+        return path if path.startswith("/Workspace") else "/Workspace" + path
+    else:
+        return os.path.join(*relative_parts)
+
+csv_file = get_absolute_path("..", "fixtures", "mycsv.csv")
+df = pd.read_csv(csv_file)
+display(df)
+```
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/out.gitignore
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/out.gitignore
@ -0,0 +1,8 @@
+.databricks/
+build/
+dist/
+__pycache__/
+*.egg-info
+.venv/
+scratch/**
+!scratch/README.md
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/pytest.ini
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/pytest.ini
@ -0,0 +1,3 @@
+[pytest]
+testpaths = tests
+pythonpath = src
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/requirements-dev.txt
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/requirements-dev.txt
@ -0,0 +1,29 @@
+## requirements-dev.txt: dependencies for local development.
+##
+## For defining dependencies used by jobs in Databricks Workflows, see
+## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
+
+## Add code completion support for DLT
+databricks-dlt
+
+## pytest is the default package used for testing
+pytest
+
+## Dependencies for building wheel files
+setuptools
+wheel
+
+## databricks-connect can be used to run parts of this project locally.
+## See https://docs.databricks.com/dev-tools/databricks-connect.html.
+##
+## databricks-connect is automatically installed if you're using Databricks
+## extension for Visual Studio Code
+## (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html).
+##
+## To manually install databricks-connect, either follow the instructions
+## at https://docs.databricks.com/dev-tools/databricks-connect.html
+## to install the package system-wide. Or uncomment the line below to install a
+## version of db-connect that corresponds to the Databricks Runtime version used
+## for this project.
+#
+# databricks-connect>=15.4,<15.5
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml
@ -0,0 +1,50 @@
+# The main job for my_default_python.
+resources:
+  jobs:
+    my_default_python_job:
+      name: my_default_python_job
+
+      trigger:
+        # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
+        periodic:
+          interval: 1
+          unit: DAYS
+
+      email_notifications:
+        on_failure:
+          - [USERNAME]
+
+      tasks:
+        - task_key: notebook_task
+          job_cluster_key: job_cluster
+          notebook_task:
+            notebook_path: ../src/notebook.ipynb
+        
+        - task_key: refresh_pipeline
+          depends_on:
+            - task_key: notebook_task
+          pipeline_task:
+            pipeline_id: ${resources.pipelines.my_default_python_pipeline.id}
+        
+        - task_key: main_task
+          depends_on:
+            - task_key: refresh_pipeline
+          job_cluster_key: job_cluster
+          python_wheel_task:
+            package_name: my_default_python
+            entry_point: main
+          libraries:
+            # By default we just include the .whl file generated for the my_default_python package.
+            # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
+            # for more information on how to add other libraries.
+            - whl: ../dist/*.whl
+
+      job_clusters:
+        - job_cluster_key: job_cluster
+          new_cluster:
+            spark_version: 15.4.x-scala2.12
+            node_type_id: i3.xlarge
+            data_security_mode: SINGLE_USER
+            autoscale:
+                min_workers: 1
+                max_workers: 4
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml
@ -0,0 +1,14 @@
+# The main pipeline for my_default_python
+resources:
+  pipelines:
+    my_default_python_pipeline:
+      name: my_default_python_pipeline
+      ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog:
+      # catalog: catalog_name
+      target: my_default_python_${bundle.target}
+      libraries:
+        - notebook:
+            path: ../src/dlt_pipeline.ipynb
+
+      configuration:
+        bundle.sourcePath: ${workspace.file_path}/src
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/README.md
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/README.md
@ -0,0 +1,4 @@
+# scratch
+
+This folder is reserved for personal, exploratory notebooks.
+By default these are not committed to Git, as 'scratch' is listed in .gitignore.
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb
@ -0,0 +1,61 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "[UUID]",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "\n",
+    "sys.path.append(\"../src\")\n",
+    "from my_default_python import main\n",
+    "\n",
+    "main.get_taxis(spark).show(10)"
+   ]
+  }
+ ],
+ "metadata": {
+  "application/vnd.databricks.v1+notebook": {
+   "dashboards": [],
+   "language": "python",
+   "notebookMetadata": {
+    "pythonIndentUnit": 2
+   },
+   "notebookName": "ipynb-notebook",
+   "widgets": {}
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/setup.py
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/setup.py
@ -0,0 +1,41 @@
+"""
+setup.py configuration script describing how to build and package this project.
+
+This file is primarily used by the setuptools library and typically should not
+be executed directly. See README.md for how to deploy, test, and run
+the my_default_python project.
+"""
+
+from setuptools import setup, find_packages
+
+import sys
+
+sys.path.append("./src")
+
+import datetime
+import my_default_python
+
+local_version = datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S")
+
+setup(
+    name="my_default_python",
+    # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.)
+    # to ensure that changes to wheel package are picked up when used on all-purpose clusters
+    version=my_default_python.__version__ + "+" + local_version,
+    url="https://databricks.com",
+    author="[USERNAME]",
+    description="wheel file based on my_default_python/src",
+    packages=find_packages(where="./src"),
+    package_dir={"": "src"},
+    entry_points={
+        "packages": [
+            "main=my_default_python.main:main",
+        ],
+    },
+    install_requires=[
+        # Dependencies in case the output wheel file is used as a library dependency.
+        # For defining dependencies, when this package is used in Databricks, see:
+        # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
+        "setuptools"
+    ],
+)
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/dlt_pipeline.ipynb
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/dlt_pipeline.ipynb
@ -0,0 +1,90 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "[UUID]",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "source": [
+    "# DLT pipeline\n",
+    "\n",
+    "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/my_default_python.pipeline.yml."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "[UUID]",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [],
+   "source": [
+    "# Import DLT and src/my_default_python\n",
+    "import dlt\n",
+    "import sys\n",
+    "\n",
+    "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n",
+    "from pyspark.sql.functions import expr\n",
+    "from my_default_python import main"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "[UUID]",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [],
+   "source": [
+    "@dlt.view\n",
+    "def taxi_raw():\n",
+    "    return main.get_taxis(spark)\n",
+    "\n",
+    "\n",
+    "@dlt.table\n",
+    "def filtered_taxis():\n",
+    "    return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))"
+   ]
+  }
+ ],
+ "metadata": {
+  "application/vnd.databricks.v1+notebook": {
+   "dashboards": [],
+   "language": "python",
+   "notebookMetadata": {
+    "pythonIndentUnit": 2
+   },
+   "notebookName": "dlt_pipeline",
+   "widgets": {}
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/init.py
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/init.py
@ -0,0 +1 @@
+__version__ = "0.0.1"
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py
@ -0,0 +1,25 @@
+from pyspark.sql import SparkSession, DataFrame
+
+
+def get_taxis(spark: SparkSession) -> DataFrame:
+    return spark.read.table("samples.nyctaxi.trips")
+
+
+# Create a new Databricks Connect session. If this fails,
+# check that you have configured Databricks Connect correctly.
+# See https://docs.databricks.com/dev-tools/databricks-connect.html.
+def get_spark() -> SparkSession:
+    try:
+        from databricks.connect import DatabricksSession
+
+        return DatabricksSession.builder.getOrCreate()
+    except ImportError:
+        return SparkSession.builder.getOrCreate()
+
+
+def main():
+    get_taxis(get_spark()).show(5)
+
+
+if __name__ == "__main__":
+    main()
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb
@ -0,0 +1,75 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {},
+     "inputWidgets": {},
+     "nuid": "[UUID]",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "source": [
+    "# Default notebook\n",
+    "\n",
+    "This default notebook is executed using Databricks Workflows as defined in resources/my_default_python.job.yml."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 0,
+   "metadata": {
+    "application/vnd.databricks.v1+cell": {
+     "cellMetadata": {
+      "byteLimit": 2048000,
+      "rowLimit": 10000
+     },
+     "inputWidgets": {},
+     "nuid": "[UUID]",
+     "showTitle": false,
+     "title": ""
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from my_default_python import main\n",
+    "\n",
+    "main.get_taxis(spark).show(10)"
+   ]
+  }
+ ],
+ "metadata": {
+  "application/vnd.databricks.v1+notebook": {
+   "dashboards": [],
+   "language": "python",
+   "notebookMetadata": {
+    "pythonIndentUnit": 2
+   },
+   "notebookName": "notebook",
+   "widgets": {}
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py
@ -0,0 +1,6 @@
+from my_default_python.main import get_taxis, get_spark
+
+
+def test_main():
+    taxis = get_taxis(get_spark())
+    assert taxis.count() > 5
--- a/acceptance/bundle/templates/default-python/classic/script
+++ b/acceptance/bundle/templates/default-python/classic/script
@ -11,5 +11,3 @@ cd ../../

 # Calculate the difference from the serverless template
 diff.py $TESTDIR/../serverless/output output/ > out.compare-vs-serverless.diff
-
-rm -fr output