acc: Include full output for default-python/classic (#2391)

## Tests Include full output of default-python/classic so it can be used as a base for diffs in cloud tests #2383
2025-02-27 11:16:06 +01:00 · 2025-02-27 11:16:06 +01:00 · 13ac52391d
parent 81606cfcbc
commit 13ac52391d
20 changed files with 533 additions and 2 deletions
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/builtins.pyi
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/builtins.pyi
@ -0,0 +1,3 @@
 # Typings for Pylance in Visual Studio Code
 # see https://github.com/microsoft/pyright/blob/main/docs/builtins.md
 from databricks.sdk.runtime import *
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/extensions.json
@ -0,0 +1,7 @@
 {
    "recommendations": [
        "databricks.databricks",
        "ms-python.vscode-pylance",
        "redhat.vscode-yaml"
    ]
 }
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/.vscode/settings.json
@ -0,0 +1,16 @@
 {
    "python.analysis.stubPath": ".vscode",
    "jupyter.interactiveWindow.cellMarker.codeRegex": "^# COMMAND ----------|^# Databricks notebook source|^(#\\s*%%|#\\s*\\<codecell\\>|#\\s*In\\[\\d*?\\]|#\\s*In\\[ \\])",
    "jupyter.interactiveWindow.cellMarker.default": "# COMMAND ----------",
    "python.testing.pytestArgs": [
        "."
    ],
    "python.testing.unittestEnabled": false,
    "python.testing.pytestEnabled": true,
    "python.analysis.extraPaths": ["src"],
    "files.exclude": {
        "**/*.egg-info": true,
        "**/__pycache__": true,
        ".pytest_cache": true,
    },
 }
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/README.md
@ -0,0 +1,49 @@
 # my_default_python
 The 'my_default_python' project was generated by using the default-python template.
 ## Getting started
 1. Install the Databricks CLI from https://docs.databricks.com/dev-tools/cli/databricks-cli.html
 2. Authenticate to your Databricks workspace, if you have not done so already:
    ```
    $ databricks configure
    ```
 3. To deploy a development copy of this project, type:
    ```
    $ databricks bundle deploy --target dev
    ```
    (Note that "dev" is the default target, so the `--target` parameter
    is optional here.)
    This deploys everything that's defined for this project.
    For example, the default template would deploy a job called
    `[dev yourname] my_default_python_job` to your workspace.
    You can find that job by opening your workpace and clicking on **Workflows**.
 4. Similarly, to deploy a production copy, type:
   ```
   $ databricks bundle deploy --target prod
   ```
   Note that the default job from the template has a schedule that runs every day
   (defined in resources/my_default_python.job.yml). The schedule
   is paused when deploying in development mode (see
   https://docs.databricks.com/dev-tools/bundles/deployment-modes.html).
 5. To run a job or pipeline, use the "run" command:
   ```
   $ databricks bundle run
   ```
 6. Optionally, install the Databricks extension for Visual Studio code for local development from
   https://docs.databricks.com/dev-tools/vscode-ext.html. It can configure your
   virtual environment and setup Databricks Connect for running unit tests locally.
   When not using these tools, consult your development environment's documentation
   and/or the documentation for Databricks Connect for manually setting up your environment
   (https://docs.databricks.com/en/dev-tools/databricks-connect/python/index.html).
 7. For documentation on the Databricks asset bundles format used
   for this project, and for CI/CD configuration, see
   https://docs.databricks.com/dev-tools/bundles/index.html.
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/databricks.yml
@ -0,0 +1,29 @@
 # This is a Databricks asset bundle definition for my_default_python.
 # See https://docs.databricks.com/dev-tools/bundles/index.html for documentation.
 bundle:
  name: my_default_python
  uuid: [UUID]
 include:
  - resources/*.yml
 targets:
  dev:
    # The default target uses 'mode: development' to create a development copy.
    # - Deployed resources get prefixed with '[dev my_user_name]'
    # - Any job schedules and triggers are paused by default.
    # See also https://docs.databricks.com/dev-tools/bundles/deployment-modes.html.
    mode: development
    default: true
    workspace:
      host: [DATABRICKS_URL]
  prod:
    mode: production
    workspace:
      host: [DATABRICKS_URL]
      # We explicitly deploy to /Workspace/Users/[USERNAME] to make sure we only have a single copy.
      root_path: /Workspace/Users/[USERNAME]/.bundle/${bundle.name}/${bundle.target}
    permissions:
      - user_name: [USERNAME]
        level: CAN_MANAGE
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/fixtures/.gitkeep
@ -0,0 +1,22 @@
 # Fixtures
 This folder is reserved for fixtures, such as CSV files.
 Below is an example of how to load fixtures as a data frame:
 ```
 import pandas as pd
 import os
 def get_absolute_path(*relative_parts):
    if 'dbutils' in globals():
        base_dir = os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) # type: ignore
        path = os.path.normpath(os.path.join(base_dir, *relative_parts))
        return path if path.startswith("/Workspace") else "/Workspace" + path
    else:
        return os.path.join(*relative_parts)
 csv_file = get_absolute_path("..", "fixtures", "mycsv.csv")
 df = pd.read_csv(csv_file)
 display(df)
 ```
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/out.gitignore
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/out.gitignore
@ -0,0 +1,8 @@
 .databricks/
 build/
 dist/
 __pycache__/
 *.egg-info
 .venv/
 scratch/**
 !scratch/README.md
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/pytest.ini
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/pytest.ini
@ -0,0 +1,3 @@
 [pytest]
 testpaths = tests
 pythonpath = src
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/requirements-dev.txt
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/requirements-dev.txt
@ -0,0 +1,29 @@
 ## requirements-dev.txt: dependencies for local development.
 ##
 ## For defining dependencies used by jobs in Databricks Workflows, see
 ## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
 ## Add code completion support for DLT
 databricks-dlt
 ## pytest is the default package used for testing
 pytest
 ## Dependencies for building wheel files
 setuptools
 wheel
 ## databricks-connect can be used to run parts of this project locally.
 ## See https://docs.databricks.com/dev-tools/databricks-connect.html.
 ##
 ## databricks-connect is automatically installed if you're using Databricks
 ## extension for Visual Studio Code
 ## (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html).
 ##
 ## To manually install databricks-connect, either follow the instructions
 ## at https://docs.databricks.com/dev-tools/databricks-connect.html
 ## to install the package system-wide. Or uncomment the line below to install a
 ## version of db-connect that corresponds to the Databricks Runtime version used
 ## for this project.
 #
 # databricks-connect>=15.4,<15.5
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.job.yml
@ -0,0 +1,50 @@
 # The main job for my_default_python.
 resources:
  jobs:
    my_default_python_job:
      name: my_default_python_job
      trigger:
        # Run this job every day, exactly one day from the last run; see https://docs.databricks.com/api/workspace/jobs/create#trigger
        periodic:
          interval: 1
          unit: DAYS
      email_notifications:
        on_failure:
          - [USERNAME]
      tasks:
        - task_key: notebook_task
          job_cluster_key: job_cluster
          notebook_task:
            notebook_path: ../src/notebook.ipynb
        - task_key: refresh_pipeline
          depends_on:
            - task_key: notebook_task
          pipeline_task:
            pipeline_id: ${resources.pipelines.my_default_python_pipeline.id}
        - task_key: main_task
          depends_on:
            - task_key: refresh_pipeline
          job_cluster_key: job_cluster
          python_wheel_task:
            package_name: my_default_python
            entry_point: main
          libraries:
            # By default we just include the .whl file generated for the my_default_python package.
            # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
            # for more information on how to add other libraries.
            - whl: ../dist/*.whl
      job_clusters:
        - job_cluster_key: job_cluster
          new_cluster:
            spark_version: 15.4.x-scala2.12
            node_type_id: i3.xlarge
            data_security_mode: SINGLE_USER
            autoscale:
                min_workers: 1
                max_workers: 4
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/resources/my_default_python.pipeline.yml
@ -0,0 +1,14 @@
 # The main pipeline for my_default_python
 resources:
  pipelines:
    my_default_python_pipeline:
      name: my_default_python_pipeline
      ## Specify the 'catalog' field to configure this pipeline to make use of Unity Catalog:
      # catalog: catalog_name
      target: my_default_python_${bundle.target}
      libraries:
        - notebook:
            path: ../src/dlt_pipeline.ipynb
      configuration:
        bundle.sourcePath: ${workspace.file_path}/src
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/README.md
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/README.md
@ -0,0 +1,4 @@
 # scratch
 This folder is reserved for personal, exploratory notebooks.
 By default these are not committed to Git, as 'scratch' is listed in .gitignore.
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/scratch/exploration.ipynb
@ -0,0 +1,61 @@
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "[UUID]",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "import sys\n",
    "\n",
    "sys.path.append(\"../src\")\n",
    "from my_default_python import main\n",
    "\n",
    "main.get_taxis(spark).show(10)"
   ]
  }
 ],
 "metadata": {
  "application/vnd.databricks.v1+notebook": {
   "dashboards": [],
   "language": "python",
   "notebookMetadata": {
    "pythonIndentUnit": 2
   },
   "notebookName": "ipynb-notebook",
   "widgets": {}
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/setup.py
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/setup.py
@ -0,0 +1,41 @@
 """
 setup.py configuration script describing how to build and package this project.
 This file is primarily used by the setuptools library and typically should not
 be executed directly. See README.md for how to deploy, test, and run
 the my_default_python project.
 """
 from setuptools import setup, find_packages
 import sys
 sys.path.append("./src")
 import datetime
 import my_default_python
 local_version = datetime.datetime.utcnow().strftime("%Y%m%d.%H%M%S")
 setup(
    name="my_default_python",
    # We use timestamp as Local version identifier (https://peps.python.org/pep-0440/#local-version-identifiers.)
    # to ensure that changes to wheel package are picked up when used on all-purpose clusters
    version=my_default_python.__version__ + "+" + local_version,
    url="https://databricks.com",
    author="[USERNAME]",
    description="wheel file based on my_default_python/src",
    packages=find_packages(where="./src"),
    package_dir={"": "src"},
    entry_points={
        "packages": [
            "main=my_default_python.main:main",
        ],
    },
    install_requires=[
        # Dependencies in case the output wheel file is used as a library dependency.
        # For defining dependencies, when this package is used in Databricks, see:
        # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
        "setuptools"
    ],
 )
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/dlt_pipeline.ipynb
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/dlt_pipeline.ipynb
@ -0,0 +1,90 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "[UUID]",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "# DLT pipeline\n",
    "\n",
    "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/my_default_python.pipeline.yml."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "[UUID]",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "# Import DLT and src/my_default_python\n",
    "import dlt\n",
    "import sys\n",
    "\n",
    "sys.path.append(spark.conf.get(\"bundle.sourcePath\", \".\"))\n",
    "from pyspark.sql.functions import expr\n",
    "from my_default_python import main"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "[UUID]",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "@dlt.view\n",
    "def taxi_raw():\n",
    "    return main.get_taxis(spark)\n",
    "\n",
    "\n",
    "@dlt.table\n",
    "def filtered_taxis():\n",
    "    return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))"
   ]
  }
 ],
 "metadata": {
  "application/vnd.databricks.v1+notebook": {
   "dashboards": [],
   "language": "python",
   "notebookMetadata": {
    "pythonIndentUnit": 2
   },
   "notebookName": "dlt_pipeline",
   "widgets": {}
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/init.py
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/init.py
@ -0,0 +1 @@
 __version__ = "0.0.1"
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/my_default_python/main.py
@ -0,0 +1,25 @@
 from pyspark.sql import SparkSession, DataFrame
 def get_taxis(spark: SparkSession) -> DataFrame:
    return spark.read.table("samples.nyctaxi.trips")
 # Create a new Databricks Connect session. If this fails,
 # check that you have configured Databricks Connect correctly.
 # See https://docs.databricks.com/dev-tools/databricks-connect.html.
 def get_spark() -> SparkSession:
    try:
        from databricks.connect import DatabricksSession
        return DatabricksSession.builder.getOrCreate()
    except ImportError:
        return SparkSession.builder.getOrCreate()
 def main():
    get_taxis(get_spark()).show(5)
 if __name__ == "__main__":
    main()
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/src/notebook.ipynb
@ -0,0 +1,75 @@
 {
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {},
     "inputWidgets": {},
     "nuid": "[UUID]",
     "showTitle": false,
     "title": ""
    }
   },
   "source": [
    "# Default notebook\n",
    "\n",
    "This default notebook is executed using Databricks Workflows as defined in resources/my_default_python.job.yml."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 0,
   "metadata": {
    "application/vnd.databricks.v1+cell": {
     "cellMetadata": {
      "byteLimit": 2048000,
      "rowLimit": 10000
     },
     "inputWidgets": {},
     "nuid": "[UUID]",
     "showTitle": false,
     "title": ""
    }
   },
   "outputs": [],
   "source": [
    "from my_default_python import main\n",
    "\n",
    "main.get_taxis(spark).show(10)"
   ]
  }
 ],
 "metadata": {
  "application/vnd.databricks.v1+notebook": {
   "dashboards": [],
   "language": "python",
   "notebookMetadata": {
    "pythonIndentUnit": 2
   },
   "notebookName": "notebook",
   "widgets": {}
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
 }
--- a/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py
+++ b/acceptance/bundle/templates/default-python/classic/output/my_default_python/tests/main_test.py
@ -0,0 +1,6 @@
 from my_default_python.main import get_taxis, get_spark
 def test_main():
    taxis = get_taxis(get_spark())
    assert taxis.count() > 5
--- a/acceptance/bundle/templates/default-python/classic/script
+++ b/acceptance/bundle/templates/default-python/classic/script
@ -11,5 +11,3 @@ cd ../../
 # Calculate the difference from the serverless template
 diff.py $TESTDIR/../serverless/output output/ > out.compare-vs-serverless.diff
 rm -fr output