Make the default `databricks bundle init` template more self-explanatory (#796)

This makes the default-python template more self-explanatory and adds a few other tweaks for a better out-of-the-box experience.
2023-09-26 11:12:34 +02:00 · 2023-09-26 11:12:34 +02:00 · 0c1516c4ba
parent 757d5efe8d
commit 0c1516c4ba
8 changed files with 72 additions and 11 deletions
--- a/libs/template/renderer_test.go
+++ b/libs/template/renderer_test.go
@ -41,6 +41,7 @@ func assertBuiltinTemplateValid(t *testing.T, settings map[string]any, target st
 	templatePath, err := prepareBuiltinTemplates("default-python", tempDir)
 	require.NoError(t, err)
 	libraryPath := filepath.Join(templatePath, "library")
 	w := &databricks.WorkspaceClient{
 		Config: &workspaceConfig.Config{Host: "https://myhost.com"},
@ -52,7 +53,7 @@ func assertBuiltinTemplateValid(t *testing.T, settings map[string]any, target st
 	ctx = root.SetWorkspaceClient(ctx, w)
 	helpers := loadHelpers(ctx)
-	renderer, err := newRenderer(ctx, settings, helpers, templatePath, "./testdata/template-in-path/library", tempDir)
+	renderer, err := newRenderer(ctx, settings, helpers, templatePath, libraryPath, tempDir)
 	require.NoError(t, err)
 	// Evaluate template
--- a/libs/template/templates/default-python/library/versions.tmpl
+++ b/libs/template/templates/default-python/library/versions.tmpl
@ -0,0 +1,7 @@
 {{define "latest_lts_dbr_version" -}}
  13.3.x-scala2.12
 {{- end}}
 {{define "latest_lts_db_connect_version_spec" -}}
  >=13.3,<13.4
 {{- end}}
--- a/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json
+++ b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json
@ -8,7 +8,10 @@
    ],
    "python.testing.unittestEnabled": false,
    "python.testing.pytestEnabled": true,
    "python.analysis.extraPaths": ["src"],
    "files.exclude": {
-        "**/*.egg-info": true
+        "**/*.egg-info": true,
        "**/__pycache__": true,
        ".pytest_cache": true,
    },
 }
--- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl
@ -30,7 +30,7 @@ The '{{.project_name}}' project was generated by using the default-python templa
 5. To run a job or pipeline, use the "run" comand:
   ```
-   $ databricks bundle run {{.project_name}}_job
+   $ databricks bundle run
   ```
 6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from
--- a/libs/template/templates/default-python/template/{{.project_name}}/requirements-dev.txt.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/requirements-dev.txt.tmpl
@ -0,0 +1,22 @@
 ## requirements-dev.txt: dependencies for local development.
 ##
 ## For defining dependencies used by jobs in Databricks Workflows, see
 ## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
 ## pytest is the default package used for testing
 pytest
 ## databricks-connect can be used to run parts of this project locally.
 ## See https://docs.databricks.com/dev-tools/databricks-connect.html.
 ##
 ## databricks-connect is automatically installed if you're using Databricks
 ## extension for Visual Studio Code
 ## (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html).
 ##
 ## To manually install databricks-connect, either follow the instructions
 ## at https://docs.databricks.com/dev-tools/databricks-connect.html
 ## to install the package system-wide. Or uncomment the line below to install a
 ## version of db-connect that corresponds to the Databricks Runtime version used
 ## for this project.
 #
 # databricks-connect{{template "latest_lts_db_connect_version_spec"}}
--- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl
@ -49,6 +49,9 @@ resources:
            package_name: {{.project_name}}
            entry_point: main
          libraries:
            # By default we just include the .whl file generated for the {{.project_name}} package.
            # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
            # for more information on how to add other libraries.
            - whl: ../dist/*.whl
      {{else}}
@ -56,8 +59,7 @@ resources:
      job_clusters:
        - job_cluster_key: job_cluster
          new_cluster:
-            {{- /* we should always use an LTS version in our templates */}}
+            spark_version: {{template "latest_lts_dbr_version"}}
            spark_version: 13.3.x-scala2.12
            node_type_id: {{smallest_node_type}}
            autoscale:
                min_workers: 1
--- a/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl
@ -1,8 +1,9 @@
 """
-Setup script for {{.project_name}}.
+setup.py configuration script describing how to build and package this project.
-This script packages and distributes the associated wheel file(s).
+This file is primarily used by the setuptools library and typically should not
-Source code is in ./src/. Run 'python setup.py sdist bdist_wheel' to build.
+be executed directly. See README.md for how to deploy, test, and run
 the {{.project_name}} project.
 """
 from setuptools import setup, find_packages
@ -16,9 +17,18 @@ setup(
    version={{.project_name}}.__version__,
    url="https://databricks.com",
    author="{{user_name}}",
-    description="my test wheel",
+    description="wheel file based on {{.project_name}}/src",
    packages=find_packages(where='./src'),
    package_dir={'': 'src'},
-    entry_points={"entry_points": "main={{.project_name}}.main:main"},
+    entry_points={
-    install_requires=["setuptools"],
+        "packages": [
            "main={{.project_name}}.main:main"
        ]
    },
    install_requires=[
        # Dependencies in case the output wheel file is used as a library dependency.
        # For defining dependencies, when this package is used in Databricks, see:
        # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
        "setuptools"
    ],
 )
--- a/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl
+++ b/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl
@ -1,5 +1,21 @@
 from databricks.connect import DatabricksSession
 from pyspark.sql import SparkSession
 from {{.project_name}} import main
 # Create a new Databricks Connect session. If this fails,
 # check that you have configured Databricks Connect correctly.
 # See https://docs.databricks.com/dev-tools/databricks-connect.html.
 {{/*
  The below works around a problematic error message from Databricks Connect.
  The standard SparkSession is supported in all configurations (workspace, IDE,
  all runtime versions, CLI). But on the CLI it currently gives a confusing
  error message if SPARK_REMOTE is not set. We can't directly use
  DatabricksSession.builder in main.py, so we're re-assigning it here so
  everything works out of the box, even for CLI users who don't set SPARK_REMOTE.
 */}}
 SparkSession.builder = DatabricksSession.builder
 SparkSession.builder.getOrCreate()
 def test_main():
    taxis = main.get_taxis()
    assert taxis.count() > 5