diff --git a/libs/template/renderer_test.go b/libs/template/renderer_test.go index 8f8a8291d..070fc5d25 100644 --- a/libs/template/renderer_test.go +++ b/libs/template/renderer_test.go @@ -41,6 +41,7 @@ func assertBuiltinTemplateValid(t *testing.T, settings map[string]any, target st templatePath, err := prepareBuiltinTemplates("default-python", tempDir) require.NoError(t, err) + libraryPath := filepath.Join(templatePath, "library") w := &databricks.WorkspaceClient{ Config: &workspaceConfig.Config{Host: "https://myhost.com"}, @@ -52,7 +53,7 @@ func assertBuiltinTemplateValid(t *testing.T, settings map[string]any, target st ctx = root.SetWorkspaceClient(ctx, w) helpers := loadHelpers(ctx) - renderer, err := newRenderer(ctx, settings, helpers, templatePath, "./testdata/template-in-path/library", tempDir) + renderer, err := newRenderer(ctx, settings, helpers, templatePath, libraryPath, tempDir) require.NoError(t, err) // Evaluate template diff --git a/libs/template/templates/default-python/library/versions.tmpl b/libs/template/templates/default-python/library/versions.tmpl new file mode 100644 index 000000000..f9a879d25 --- /dev/null +++ b/libs/template/templates/default-python/library/versions.tmpl @@ -0,0 +1,7 @@ +{{define "latest_lts_dbr_version" -}} + 13.3.x-scala2.12 +{{- end}} + +{{define "latest_lts_db_connect_version_spec" -}} + >=13.3,<13.4 +{{- end}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json index 16cb2c96a..f19498daa 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json +++ b/libs/template/templates/default-python/template/{{.project_name}}/.vscode/settings.json @@ -8,7 +8,10 @@ ], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, + "python.analysis.extraPaths": ["src"], "files.exclude": { - "**/*.egg-info": true + "**/*.egg-info": true, + "**/__pycache__": true, + ".pytest_cache": true, }, } diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl index 1bcd7af41..b451d03b1 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl @@ -30,7 +30,7 @@ The '{{.project_name}}' project was generated by using the default-python templa 5. To run a job or pipeline, use the "run" comand: ``` - $ databricks bundle run {{.project_name}}_job + $ databricks bundle run ``` 6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from diff --git a/libs/template/templates/default-python/template/{{.project_name}}/requirements-dev.txt.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/requirements-dev.txt.tmpl new file mode 100644 index 000000000..2d4c0f64e --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/requirements-dev.txt.tmpl @@ -0,0 +1,22 @@ +## requirements-dev.txt: dependencies for local development. +## +## For defining dependencies used by jobs in Databricks Workflows, see +## https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + +## pytest is the default package used for testing +pytest + +## databricks-connect can be used to run parts of this project locally. +## See https://docs.databricks.com/dev-tools/databricks-connect.html. +## +## databricks-connect is automatically installed if you're using Databricks +## extension for Visual Studio Code +## (https://docs.databricks.com/dev-tools/vscode-ext/dev-tasks/databricks-connect.html). +## +## To manually install databricks-connect, either follow the instructions +## at https://docs.databricks.com/dev-tools/databricks-connect.html +## to install the package system-wide. Or uncomment the line below to install a +## version of db-connect that corresponds to the Databricks Runtime version used +## for this project. +# +# databricks-connect{{template "latest_lts_db_connect_version_spec"}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl index 1792f9479..23bdee492 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl @@ -49,6 +49,9 @@ resources: package_name: {{.project_name}} entry_point: main libraries: + # By default we just include the .whl file generated for the {{.project_name}} package. + # See https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + # for more information on how to add other libraries. - whl: ../dist/*.whl {{else}} @@ -56,8 +59,7 @@ resources: job_clusters: - job_cluster_key: job_cluster new_cluster: - {{- /* we should always use an LTS version in our templates */}} - spark_version: 13.3.x-scala2.12 + spark_version: {{template "latest_lts_dbr_version"}} node_type_id: {{smallest_node_type}} autoscale: min_workers: 1 diff --git a/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl index efd598820..4eb6b8f91 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/setup.py.tmpl @@ -1,8 +1,9 @@ """ -Setup script for {{.project_name}}. +setup.py configuration script describing how to build and package this project. -This script packages and distributes the associated wheel file(s). -Source code is in ./src/. Run 'python setup.py sdist bdist_wheel' to build. +This file is primarily used by the setuptools library and typically should not +be executed directly. See README.md for how to deploy, test, and run +the {{.project_name}} project. """ from setuptools import setup, find_packages @@ -16,9 +17,18 @@ setup( version={{.project_name}}.__version__, url="https://databricks.com", author="{{user_name}}", - description="my test wheel", + description="wheel file based on {{.project_name}}/src", packages=find_packages(where='./src'), package_dir={'': 'src'}, - entry_points={"entry_points": "main={{.project_name}}.main:main"}, - install_requires=["setuptools"], + entry_points={ + "packages": [ + "main={{.project_name}}.main:main" + ] + }, + install_requires=[ + # Dependencies in case the output wheel file is used as a library dependency. + # For defining dependencies, when this package is used in Databricks, see: + # https://docs.databricks.com/dev-tools/bundles/library-dependencies.html + "setuptools" + ], ) diff --git a/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl index f1750046a..a7a6afe0a 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl @@ -1,5 +1,21 @@ +from databricks.connect import DatabricksSession +from pyspark.sql import SparkSession from {{.project_name}} import main +# Create a new Databricks Connect session. If this fails, +# check that you have configured Databricks Connect correctly. +# See https://docs.databricks.com/dev-tools/databricks-connect.html. +{{/* + The below works around a problematic error message from Databricks Connect. + The standard SparkSession is supported in all configurations (workspace, IDE, + all runtime versions, CLI). But on the CLI it currently gives a confusing + error message if SPARK_REMOTE is not set. We can't directly use + DatabricksSession.builder in main.py, so we're re-assigning it here so + everything works out of the box, even for CLI users who don't set SPARK_REMOTE. +*/}} +SparkSession.builder = DatabricksSession.builder +SparkSession.builder.getOrCreate() + def test_main(): taxis = main.get_taxis() assert taxis.count() > 5