From f9e521b43e1e19b5ae52ca1c512f6690204e8b2a Mon Sep 17 00:00:00 2001 From: "Lennart Kats (databricks)" Date: Wed, 6 Sep 2023 11:52:31 +0200 Subject: [PATCH] databricks bundle init template v2: optional stubs, DLT support (#700) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Changes This follows up on https://github.com/databricks/cli/pull/686. This PR makes our stubs optional + it adds DLT stubs: ``` $ databricks bundle init Template to use [default-python]: default-python Unique name for this project [my_project]: my_project Include a stub (sample) notebook in 'my_project/src' [yes]: yes Include a stub (sample) DLT pipeline in 'my_project/src' [yes]: yes Include a stub (sample) Python package 'my_project/src' [yes]: yes ✨ Successfully initialized template ``` ## Tests Manual testing, matrix tests. --------- Co-authored-by: Andrew Nester Co-authored-by: PaulCornellDB Co-authored-by: Pieter Noordhuis --- bundle/bundle.go | 4 + .../config/mutator/populate_current_user.go | 4 + bundle/deploy/terraform/apply.go | 4 + bundle/deploy/terraform/convert.go | 9 +- bundle/deploy/terraform/convert_test.go | 18 +-- bundle/deploy/terraform/write.go | 3 +- libs/template/helpers.go | 25 ++-- libs/template/renderer.go | 16 ++- libs/template/renderer_test.go | 96 +++++++++++++++ .../databricks_template_schema.json | 27 ++++- .../templates/default-python/defaults.json | 5 +- .../default-python/template/__preamble.tmpl | 38 ++++++ .../template/{{.project_name}}/README.md.tmpl | 15 ++- .../{{.project_name}}/resources/.gitkeep | 1 + .../resources/{{.project_name}}_job.yml.tmpl | 28 ++++- .../{{.project_name}}_pipeline.yml.tmpl | 12 ++ ...ploration.ipynb => exploration.ipynb.tmpl} | 8 +- .../src/dlt_pipeline.ipynb.tmpl | 112 ++++++++++++++++++ .../{{.project_name}}/src/notebook.ipynb.tmpl | 6 +- .../{{.project_name}}/tests/main_test.py.tmpl | 2 +- 20 files changed, 393 insertions(+), 40 deletions(-) create mode 100644 libs/template/templates/default-python/template/__preamble.tmpl create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/resources/.gitkeep create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_pipeline.yml.tmpl rename libs/template/templates/default-python/template/{{.project_name}}/scratch/{exploration.ipynb => exploration.ipynb.tmpl} (84%) create mode 100644 libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl diff --git a/bundle/bundle.go b/bundle/bundle.go index d69d5815..8175ce28 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -37,6 +37,10 @@ type Bundle struct { // Stores an initialized copy of this bundle's Terraform wrapper. Terraform *tfexec.Terraform + // Indicates that the Terraform definition based on this bundle is empty, + // i.e. that it would deploy no resources. + TerraformHasNoResources bool + // Stores the locker responsible for acquiring/releasing a deployment lock. Locker *locker.Locker diff --git a/bundle/config/mutator/populate_current_user.go b/bundle/config/mutator/populate_current_user.go index cbaa2d30..bba0457c 100644 --- a/bundle/config/mutator/populate_current_user.go +++ b/bundle/config/mutator/populate_current_user.go @@ -21,6 +21,10 @@ func (m *populateCurrentUser) Name() string { } func (m *populateCurrentUser) Apply(ctx context.Context, b *bundle.Bundle) error { + if b.Config.Workspace.CurrentUser != nil { + return nil + } + w := b.WorkspaceClient() me, err := w.CurrentUser.Me(ctx) if err != nil { diff --git a/bundle/deploy/terraform/apply.go b/bundle/deploy/terraform/apply.go index ab868f76..53cffbba 100644 --- a/bundle/deploy/terraform/apply.go +++ b/bundle/deploy/terraform/apply.go @@ -16,6 +16,10 @@ func (w *apply) Name() string { } func (w *apply) Apply(ctx context.Context, b *bundle.Bundle) error { + if b.TerraformHasNoResources { + cmdio.LogString(ctx, "Note: there are no resources to deploy for this bundle") + return nil + } tf := b.Terraform if tf == nil { return fmt.Errorf("terraform not initialized") diff --git a/bundle/deploy/terraform/convert.go b/bundle/deploy/terraform/convert.go index ac68bd35..41bde91d 100644 --- a/bundle/deploy/terraform/convert.go +++ b/bundle/deploy/terraform/convert.go @@ -49,12 +49,14 @@ func convPermission(ac resources.Permission) schema.ResourcePermissionsAccessCon // // NOTE: THIS IS CURRENTLY A HACK. WE NEED A BETTER WAY TO // CONVERT TO/FROM TERRAFORM COMPATIBLE FORMAT. -func BundleToTerraform(config *config.Root) *schema.Root { +func BundleToTerraform(config *config.Root) (*schema.Root, bool) { tfroot := schema.NewRoot() tfroot.Provider = schema.NewProviders() tfroot.Resource = schema.NewResources() + noResources := true for k, src := range config.Resources.Jobs { + noResources = false var dst schema.ResourceJob conv(src, &dst) @@ -100,6 +102,7 @@ func BundleToTerraform(config *config.Root) *schema.Root { } for k, src := range config.Resources.Pipelines { + noResources = false var dst schema.ResourcePipeline conv(src, &dst) @@ -127,6 +130,7 @@ func BundleToTerraform(config *config.Root) *schema.Root { } for k, src := range config.Resources.Models { + noResources = false var dst schema.ResourceMlflowModel conv(src, &dst) tfroot.Resource.MlflowModel[k] = &dst @@ -139,6 +143,7 @@ func BundleToTerraform(config *config.Root) *schema.Root { } for k, src := range config.Resources.Experiments { + noResources = false var dst schema.ResourceMlflowExperiment conv(src, &dst) tfroot.Resource.MlflowExperiment[k] = &dst @@ -150,7 +155,7 @@ func BundleToTerraform(config *config.Root) *schema.Root { } } - return tfroot + return tfroot, noResources } func TerraformToBundle(state *tfjson.State, config *config.Root) error { diff --git a/bundle/deploy/terraform/convert_test.go b/bundle/deploy/terraform/convert_test.go index c47824ec..4d912fbe 100644 --- a/bundle/deploy/terraform/convert_test.go +++ b/bundle/deploy/terraform/convert_test.go @@ -40,7 +40,7 @@ func TestConvertJob(t *testing.T) { }, } - out := BundleToTerraform(&config) + out, _ := BundleToTerraform(&config) assert.Equal(t, "my job", out.Resource.Job["my_job"].Name) assert.Len(t, out.Resource.Job["my_job"].JobCluster, 1) assert.Equal(t, "https://github.com/foo/bar", out.Resource.Job["my_job"].GitSource.Url) @@ -65,7 +65,7 @@ func TestConvertJobPermissions(t *testing.T) { }, } - out := BundleToTerraform(&config) + out, _ := BundleToTerraform(&config) assert.NotEmpty(t, out.Resource.Permissions["job_my_job"].JobId) assert.Len(t, out.Resource.Permissions["job_my_job"].AccessControl, 1) @@ -101,7 +101,7 @@ func TestConvertJobTaskLibraries(t *testing.T) { }, } - out := BundleToTerraform(&config) + out, _ := BundleToTerraform(&config) assert.Equal(t, "my job", out.Resource.Job["my_job"].Name) require.Len(t, out.Resource.Job["my_job"].Task, 1) require.Len(t, out.Resource.Job["my_job"].Task[0].Library, 1) @@ -135,7 +135,7 @@ func TestConvertPipeline(t *testing.T) { }, } - out := BundleToTerraform(&config) + out, _ := BundleToTerraform(&config) assert.Equal(t, "my pipeline", out.Resource.Pipeline["my_pipeline"].Name) assert.Len(t, out.Resource.Pipeline["my_pipeline"].Library, 2) assert.Nil(t, out.Data) @@ -159,7 +159,7 @@ func TestConvertPipelinePermissions(t *testing.T) { }, } - out := BundleToTerraform(&config) + out, _ := BundleToTerraform(&config) assert.NotEmpty(t, out.Resource.Permissions["pipeline_my_pipeline"].PipelineId) assert.Len(t, out.Resource.Permissions["pipeline_my_pipeline"].AccessControl, 1) @@ -194,7 +194,7 @@ func TestConvertModel(t *testing.T) { }, } - out := BundleToTerraform(&config) + out, _ := BundleToTerraform(&config) assert.Equal(t, "name", out.Resource.MlflowModel["my_model"].Name) assert.Equal(t, "description", out.Resource.MlflowModel["my_model"].Description) assert.Len(t, out.Resource.MlflowModel["my_model"].Tags, 2) @@ -223,7 +223,7 @@ func TestConvertModelPermissions(t *testing.T) { }, } - out := BundleToTerraform(&config) + out, _ := BundleToTerraform(&config) assert.NotEmpty(t, out.Resource.Permissions["mlflow_model_my_model"].RegisteredModelId) assert.Len(t, out.Resource.Permissions["mlflow_model_my_model"].AccessControl, 1) @@ -247,7 +247,7 @@ func TestConvertExperiment(t *testing.T) { }, } - out := BundleToTerraform(&config) + out, _ := BundleToTerraform(&config) assert.Equal(t, "name", out.Resource.MlflowExperiment["my_experiment"].Name) assert.Nil(t, out.Data) } @@ -270,7 +270,7 @@ func TestConvertExperimentPermissions(t *testing.T) { }, } - out := BundleToTerraform(&config) + out, _ := BundleToTerraform(&config) assert.NotEmpty(t, out.Resource.Permissions["mlflow_experiment_my_experiment"].ExperimentId) assert.Len(t, out.Resource.Permissions["mlflow_experiment_my_experiment"].AccessControl, 1) diff --git a/bundle/deploy/terraform/write.go b/bundle/deploy/terraform/write.go index b40a7053..0bf9ab24 100644 --- a/bundle/deploy/terraform/write.go +++ b/bundle/deploy/terraform/write.go @@ -21,7 +21,8 @@ func (w *write) Apply(ctx context.Context, b *bundle.Bundle) error { return err } - root := BundleToTerraform(&b.Config) + root, noResources := BundleToTerraform(&b.Config) + b.TerraformHasNoResources = noResources f, err := os.Create(filepath.Join(dir, "bundle.tf.json")) if err != nil { return err diff --git a/libs/template/helpers.go b/libs/template/helpers.go index 29abbe21..31752270 100644 --- a/libs/template/helpers.go +++ b/libs/template/helpers.go @@ -26,9 +26,10 @@ type pair struct { v any } +var cachedUser *iam.User +var cachedIsServicePrincipal *bool + func loadHelpers(ctx context.Context) template.FuncMap { - var user *iam.User - var is_service_principal *bool w := root.WorkspaceClient(ctx) return template.FuncMap{ "fail": func(format string, args ...any) (any, error) { @@ -80,32 +81,32 @@ func loadHelpers(ctx context.Context) template.FuncMap { return w.Config.Host, nil }, "user_name": func() (string, error) { - if user == nil { + if cachedUser == nil { var err error - user, err = w.CurrentUser.Me(ctx) + cachedUser, err = w.CurrentUser.Me(ctx) if err != nil { return "", err } } - result := user.UserName + result := cachedUser.UserName if result == "" { - result = user.Id + result = cachedUser.Id } return result, nil }, "is_service_principal": func() (bool, error) { - if is_service_principal != nil { - return *is_service_principal, nil + if cachedIsServicePrincipal != nil { + return *cachedIsServicePrincipal, nil } - if user == nil { + if cachedUser == nil { var err error - user, err = w.CurrentUser.Me(ctx) + cachedUser, err = w.CurrentUser.Me(ctx) if err != nil { return false, err } } - result := auth.IsServicePrincipal(user.Id) - is_service_principal = &result + result := auth.IsServicePrincipal(cachedUser.Id) + cachedIsServicePrincipal = &result return result, nil }, } diff --git a/libs/template/renderer.go b/libs/template/renderer.go index f4bd99d2..f674ea0f 100644 --- a/libs/template/renderer.go +++ b/libs/template/renderer.go @@ -9,6 +9,7 @@ import ( "path" "path/filepath" "slices" + "sort" "strings" "text/template" @@ -214,17 +215,22 @@ func (r *renderer) walk() error { // Add skip function, which accumulates skip patterns relative to current // directory r.baseTemplate.Funcs(template.FuncMap{ - "skip": func(relPattern string) string { + "skip": func(relPattern string) (string, error) { // patterns are specified relative to current directory of the file // the {{skip}} function is called from. - pattern := path.Join(currentDirectory, relPattern) + patternRaw := path.Join(currentDirectory, relPattern) + pattern, err := r.executeTemplate(patternRaw) + if err != nil { + return "", err + } + if !slices.Contains(r.skipPatterns, pattern) { logger.Infof(r.ctx, "adding skip pattern: %s", pattern) r.skipPatterns = append(r.skipPatterns, pattern) } // return empty string will print nothing at function call site // when executing the template - return "" + return "", nil }, }) @@ -239,6 +245,10 @@ func (r *renderer) walk() error { if err != nil { return err } + // Sort by name to ensure deterministic ordering + sort.Slice(entries, func(i, j int) bool { + return entries[i].Name() < entries[j].Name() + }) for _, entry := range entries { if entry.IsDir() { // Add to slice, for BFS traversal diff --git a/libs/template/renderer_test.go b/libs/template/renderer_test.go index a2e5675e..21dd1e4f 100644 --- a/libs/template/renderer_test.go +++ b/libs/template/renderer_test.go @@ -12,7 +12,14 @@ import ( "testing" "text/template" + "github.com/databricks/cli/bundle" + bundleConfig "github.com/databricks/cli/bundle/config" + "github.com/databricks/cli/bundle/config/mutator" + "github.com/databricks/cli/bundle/phases" "github.com/databricks/cli/cmd/root" + "github.com/databricks/databricks-sdk-go" + workspaceConfig "github.com/databricks/databricks-sdk-go/config" + "github.com/databricks/databricks-sdk-go/service/iam" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -29,6 +36,95 @@ func assertFilePermissions(t *testing.T, path string, perm fs.FileMode) { assert.Equal(t, perm, info.Mode().Perm()) } +func assertBuiltinTemplateValid(t *testing.T, settings map[string]any, target string, isServicePrincipal bool, build bool, tempDir string) { + ctx := context.Background() + + templatePath, err := prepareBuiltinTemplates("default-python", tempDir) + require.NoError(t, err) + + w := &databricks.WorkspaceClient{ + Config: &workspaceConfig.Config{Host: "https://myhost.com"}, + } + + // Prepare helpers + cachedUser = &iam.User{UserName: "user@domain.com"} + cachedIsServicePrincipal = &isServicePrincipal + ctx = root.SetWorkspaceClient(ctx, w) + helpers := loadHelpers(ctx) + + renderer, err := newRenderer(ctx, settings, helpers, templatePath, "./testdata/template-in-path/library", tempDir) + require.NoError(t, err) + + // Evaluate template + err = renderer.walk() + require.NoError(t, err) + err = renderer.persistToDisk() + require.NoError(t, err) + b, err := bundle.Load(ctx, filepath.Join(tempDir, "template", "my_project")) + require.NoError(t, err) + + // Apply initialize / validation mutators + b.Config.Workspace.CurrentUser = &bundleConfig.User{User: cachedUser} + b.WorkspaceClient() + b.Config.Bundle.Terraform = &bundleConfig.Terraform{ + ExecPath: "sh", + } + err = bundle.Apply(ctx, b, bundle.Seq( + bundle.Seq(mutator.DefaultMutators()...), + mutator.SelectTarget(target), + phases.Initialize(), + )) + require.NoError(t, err) + + // Apply build mutator + if build { + err = bundle.Apply(ctx, b, phases.Build()) + require.NoError(t, err) + } +} + +func TestBuiltinTemplateValid(t *testing.T) { + // Test option combinations + options := []string{"yes", "no"} + isServicePrincipal := false + build := false + for _, includeNotebook := range options { + for _, includeDlt := range options { + for _, includePython := range options { + for _, isServicePrincipal := range []bool{true, false} { + config := map[string]any{ + "project_name": "my_project", + "include_notebook": includeNotebook, + "include_dlt": includeDlt, + "include_python": includePython, + } + tempDir := t.TempDir() + assertBuiltinTemplateValid(t, config, "dev", isServicePrincipal, build, tempDir) + } + } + } + } + + // Test prod mode + build + config := map[string]any{ + "project_name": "my_project", + "include_notebook": "yes", + "include_dlt": "yes", + "include_python": "yes", + } + isServicePrincipal = false + build = true + + // On Windows, we can't always remove the resulting temp dir since background + // processes might have it open, so we use 'defer' for a best-effort cleanup + tempDir, err := os.MkdirTemp("", "templates") + require.NoError(t, err) + defer os.RemoveAll(tempDir) + + assertBuiltinTemplateValid(t, config, "prod", isServicePrincipal, build, tempDir) + defer os.RemoveAll(tempDir) +} + func TestRendererWithAssociatedTemplateInLibrary(t *testing.T) { tmpDir := t.TempDir() diff --git a/libs/template/templates/default-python/databricks_template_schema.json b/libs/template/templates/default-python/databricks_template_schema.json index 3220e9a6..22c65f30 100644 --- a/libs/template/templates/default-python/databricks_template_schema.json +++ b/libs/template/templates/default-python/databricks_template_schema.json @@ -3,7 +3,32 @@ "project_name": { "type": "string", "default": "my_project", - "description": "Unique name for this project" + "description": "Unique name for this project", + "order": 1 + }, + "include_notebook": { + "todo": "use an enum here, see https://github.com/databricks/cli/pull/668", + "type": "string", + "default": "yes", + "pattern": "^(yes|no)$", + "description": "Include a stub (sample) notebook in 'my_project/src'", + "order": 2 + }, + "include_dlt": { + "todo": "use an enum here, see https://github.com/databricks/cli/pull/668", + "type": "string", + "default": "yes", + "pattern": "^(yes|no)$", + "description": "Include a stub (sample) DLT pipeline in 'my_project/src'", + "order": 3 + }, + "include_python": { + "todo": "use an enum here, see https://github.com/databricks/cli/pull/668", + "type": "string", + "default": "yes", + "pattern": "^(yes|no)$", + "description": "Include a stub (sample) Python package 'my_project/src'", + "order": 4 } } } diff --git a/libs/template/templates/default-python/defaults.json b/libs/template/templates/default-python/defaults.json index 99ecd36d..510ec4a3 100644 --- a/libs/template/templates/default-python/defaults.json +++ b/libs/template/templates/default-python/defaults.json @@ -1,3 +1,6 @@ { - "project_name": "my_project" + "project_name": "my_project", + "include_notebook": "yes", + "include_dlt": "yes", + "include_python": "yes" } diff --git a/libs/template/templates/default-python/template/__preamble.tmpl b/libs/template/templates/default-python/template/__preamble.tmpl new file mode 100644 index 00000000..c018f282 --- /dev/null +++ b/libs/template/templates/default-python/template/__preamble.tmpl @@ -0,0 +1,38 @@ +# Preamble + +This file only template directives; it is skipped for the actual output. + +{{skip "__preamble"}} + +{{ $value := .project_name }} +{{with (regexp "^[A-Za-z0-9_]*$")}} + {{if not (.MatchString $value)}} + {{fail "Invalid project_name: %s. Must consist of letter and underscores only." $value}} + {{end}} +{{end}} + +{{$notDLT := not (eq .include_dlt "yes")}} +{{$notNotebook := not (eq .include_notebook "yes")}} +{{$notPython := not (eq .include_python "yes")}} + +{{if $notPython}} + {{skip "{{.project_name}}/src/{{.project_name}}"}} + {{skip "{{.project_name}}/tests/test_main.py"}} + {{skip "{{.project_name}}/setup.py"}} + {{skip "{{.project_name}}/pytest.ini"}} +{{end}} + +{{if $notDLT}} + {{skip "{{.project_name}}/src/dlt_pipeline.ipynb"}} + {{skip "{{.project_name}}/resources/{{.project_name}}_pipeline.yml"}} +{{end}} + +{{if $notNotebook}} + {{skip "{{.project_name}}/src/notebook.iypnb"}} +{{end}} + +{{if (and $notDLT $notNotebook $notPython)}} + {{skip "{{.project_name}}/resources/{{.project_name}}_job.yml"}} +{{else}} + {{skip "{{.project_name}}/resources/.gitkeep"}} +{{end}} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl index 4c89435b..7c8876e7 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/README.md.tmpl @@ -28,10 +28,17 @@ The '{{.project_name}}' project was generated by using the default-python templa $ databricks bundle deploy --target prod ``` -5. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from - https://docs.databricks.com/dev-tools/vscode-ext.html. Or read the "getting started" documentation for - **Databricks Connect** for instructions on running the included Python code from a different IDE. +5. To run a job or pipeline, use the "run" comand: + ``` + $ databricks bundle run {{.project_name}}_job + ``` -6. For documentation on the Databricks asset bundles format used +6. Optionally, install developer tools such as the Databricks extension for Visual Studio Code from + https://docs.databricks.com/dev-tools/vscode-ext.html. +{{- if (eq .include_python "yes") }} Or read the "getting started" documentation for + **Databricks Connect** for instructions on running the included Python code from a different IDE. +{{- end}} + +7. For documentation on the Databricks asset bundles format used for this project, and for CI/CD configuration, see https://docs.databricks.com/dev-tools/bundles/index.html. diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/.gitkeep b/libs/template/templates/default-python/template/{{.project_name}}/resources/.gitkeep new file mode 100644 index 00000000..3e09c14c --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/.gitkeep @@ -0,0 +1 @@ +This folder is reserved for Databricks Asset Bundles resource definitions. diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl index f8116cdf..1792f947 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_job.yml.tmpl @@ -1,6 +1,5 @@ # The main job for {{.project_name}} resources: - jobs: {{.project_name}}_job: name: {{.project_name}}_job @@ -10,20 +9,41 @@ resources: timezone_id: Europe/Amsterdam {{- if not is_service_principal}} + email_notifications: on_failure: - {{user_name}} + + {{else}} + {{end -}} tasks: + {{- if eq .include_notebook "yes" }} - task_key: notebook_task job_cluster_key: job_cluster notebook_task: notebook_path: ../src/notebook.ipynb - - - task_key: python_wheel_task + {{end -}} + {{- if (eq .include_dlt "yes") }} + - task_key: refresh_pipeline + {{- if (eq .include_notebook "yes" )}} depends_on: - task_key: notebook_task + {{- end}} + pipeline_task: + {{- /* TODO: we should find a way that doesn't use magics for the below, like ./{{project_name}}_pipeline.yml */}} + pipeline_id: ${resources.pipelines.{{.project_name}}_pipeline.id} + {{end -}} + {{- if (eq .include_python "yes") }} + - task_key: main_task + {{- if (eq .include_dlt "yes") }} + depends_on: + - task_key: refresh_pipeline + {{- else if (eq .include_notebook "yes" )}} + depends_on: + - task_key: notebook_task + {{end}} job_cluster_key: job_cluster python_wheel_task: package_name: {{.project_name}} @@ -31,6 +51,8 @@ resources: libraries: - whl: ../dist/*.whl + {{else}} + {{end -}} job_clusters: - job_cluster_key: job_cluster new_cluster: diff --git a/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_pipeline.yml.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_pipeline.yml.tmpl new file mode 100644 index 00000000..ffe400cb --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/resources/{{.project_name}}_pipeline.yml.tmpl @@ -0,0 +1,12 @@ +# The main pipeline for {{.project_name}} +resources: + pipelines: + {{.project_name}}_pipeline: + name: "{{.project_name}}_pipeline" + target: "{{.project_name}}_${bundle.environment}" + libraries: + - notebook: + path: ../src/dlt_pipeline.ipynb + + configuration: + "bundle.sourcePath": "/Workspace/${workspace.file_path}/src" diff --git a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl similarity index 84% rename from libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb rename to libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl index 2ee36c3c..04bb261c 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb +++ b/libs/template/templates/default-python/template/{{.project_name}}/scratch/exploration.ipynb.tmpl @@ -17,11 +17,15 @@ }, "outputs": [], "source": [ + {{- if (eq .include_python "yes") }} "import sys\n", "sys.path.append('../src')\n", - "from project import main\n", + "from {{.project_name}} import main\n", "\n", - "main.taxis.show(10)" + "main.get_taxis().show(10)" + {{else}} + "spark.range(10)" + {{end -}} ] } ], diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl new file mode 100644 index 00000000..74893238 --- /dev/null +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/dlt_pipeline.ipynb.tmpl @@ -0,0 +1,112 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "9a626959-61c8-4bba-84d2-2a4ecab1f7ec", + "showTitle": false, + "title": "" + } + }, + "source": [ + "# DLT pipeline\n", + "\n", + "This Delta Live Tables (DLT) definition is executed using a pipeline defined in resources/{{.my_project}}_pipeline.yml." + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "9198e987-5606-403d-9f6d-8f14e6a4017f", + "showTitle": false, + "title": "" + }, + "jupyter": { + {{- /* Collapse this cell by default. Just boring imports here! */}} + "source_hidden": true + } + }, + "outputs": [], + "source": [ + {{- if (eq .include_python "yes") }} + "# Import DLT and make sure 'my_project' is on the Python path\n", + "import dlt\n", + "from pyspark.sql.functions import expr\n", + "from pyspark.sql import SparkSession\n", + "spark = SparkSession.builder.getOrCreate()\n", + "import sys\n", + "try:\n", + " sys.path.append(spark.conf.get(\"bundle.sourcePath\"))\n", + "except:\n", + " pass\n", + "from my_project import main" + {{else}} + "# Import DLT\n", + "import dlt\n", + "from pyspark.sql.functions import expr\n", + "from pyspark.sql import SparkSession\n", + "spark = SparkSession.builder.getOrCreate()" + {{end -}} + ] + }, + { + "cell_type": "code", + "execution_count": 0, + "metadata": { + "application/vnd.databricks.v1+cell": { + "cellMetadata": {}, + "inputWidgets": {}, + "nuid": "3fc19dba-61fd-4a89-8f8c-24fee63bfb14", + "showTitle": false, + "title": "" + } + }, + "outputs": [], + "source": [ + {{- if (eq .include_python "yes") }} + "@dlt.view\n", + "def taxi_raw():\n", + " return main.get_taxis()\n", + {{else}} + "\n", + "@dlt.view\n", + "def taxi_raw():\n", + " return spark.read.format(\"json\").load(\"/databricks-datasets/nyctaxi/sample/json/\")\n", + {{end -}} + "\n", + "@dlt.table\n", + "def filtered_taxis():\n", + " return dlt.read(\"taxi_raw\").filter(expr(\"fare_amount < 30\"))" + ] + } + ], + "metadata": { + "application/vnd.databricks.v1+notebook": { + "dashboards": [], + "language": "python", + "notebookMetadata": { + "pythonIndentUnit": 2 + }, + "notebookName": "dlt_pipeline", + "widgets": {} + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl index 26c74303..8423ecf8 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/src/notebook.ipynb.tmpl @@ -34,9 +34,13 @@ }, "outputs": [], "source": [ + {{- if (eq .include_python "yes") }} "from {{.project_name}} import main\n", "\n", - "main.get_taxis().show(10)\n" + "main.get_taxis().show(10)" + {{else}} + "spark.range(10)" + {{end -}} ] } ], diff --git a/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl b/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl index 92afccc6..f1750046 100644 --- a/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl +++ b/libs/template/templates/default-python/template/{{.project_name}}/tests/main_test.py.tmpl @@ -2,4 +2,4 @@ from {{.project_name}} import main def test_main(): taxis = main.get_taxis() - assert taxis.count() == 5 + assert taxis.count() > 5