Added support to generate Git based jobs (#2304)

## Changes
This will generate bundle YAML configuration for Git based jobs but
won't download any related files as they are in Git repo.

Fixes #1423 

## Tests
Added unit test

---------

Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
This commit is contained in:
Andrew Nester 2025-02-07 18:37:03 +00:00 committed by GitHub
parent 2a97dcaa45
commit f8aaa7fce3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 165 additions and 6 deletions

View File

@ -0,0 +1,2 @@
bundle:
name: git_job

View File

@ -0,0 +1,17 @@
resources:
jobs:
out:
name: gitjob
tasks:
- task_key: test_task
notebook_task:
notebook_path: some/test/notebook.py
- task_key: test_task_2
notebook_task:
notebook_path: /Workspace/Users/foo@bar.com/some/test/notebook.py
source: WORKSPACE
git_source:
git_branch: main
git_commit: abcdef
git_provider: github
git_url: https://git.databricks.com

View File

@ -0,0 +1,2 @@
Job is using Git source, skipping downloading files
Job configuration successfully saved to out.job.yml

View File

@ -0,0 +1 @@
$CLI bundle generate job --existing-job-id 1234 --config-dir . --key out

View File

@ -0,0 +1,33 @@
LocalOnly = true # This test needs to run against stubbed Databricks API
[[Server]]
Pattern = "GET /api/2.1/jobs/get"
Response.Body = '''
{
"job_id": 11223344,
"settings": {
"name": "gitjob",
"git_source": {
"git_url": "https://git.databricks.com",
"git_provider": "github",
"git_branch": "main",
"git_commit": "abcdef"
},
"tasks": [
{
"task_key": "test_task",
"notebook_task": {
"notebook_path": "some/test/notebook.py"
}
},
{
"task_key": "test_task_2",
"notebook_task": {
"source": "WORKSPACE",
"notebook_path": "/Workspace/Users/foo@bar.com/some/test/notebook.py"
}
}
]
}
}
'''

View File

@ -13,7 +13,6 @@ var (
func ConvertJobToValue(job *jobs.Job) (dyn.Value, error) {
value := make(map[string]dyn.Value)
if job.Settings.Tasks != nil {
tasks := make([]dyn.Value, 0)
for _, task := range job.Settings.Tasks {

View File

@ -50,10 +50,22 @@ func NewGenerateJobCommand() *cobra.Command {
}
downloader := newDownloader(w, sourceDir, configDir)
for _, task := range job.Settings.Tasks {
err := downloader.MarkTaskForDownload(ctx, &task)
if err != nil {
return err
// Don't download files if the job is using Git source
// When Git source is used, the job will be using the files from the Git repository
// but specific tasks might override this behaviour by using `source: WORKSPACE` setting.
// In this case, we don't want to download the files as well for these specific tasks
// because it leads to confusion with relative paths between workspace and GIT files.
// Instead we keep these tasks as is and let the user handle the files manually.
// The configuration will be deployable as tasks paths for source: WORKSPACE tasks will be absolute workspace paths.
if job.Settings.GitSource != nil {
cmdio.LogString(ctx, "Job is using Git source, skipping downloading files")
} else {
for _, task := range job.Settings.Tasks {
err := downloader.MarkTaskForDownload(ctx, &task)
if err != nil {
return err
}
}
}

View File

@ -22,9 +22,50 @@ func ConvertToMapValue(strct any, order *Order, skipFields []string, dst map[str
return dyn.InvalidValue, fmt.Errorf("expected map, got %s", mv.Kind())
}
mv, err = sortMapAlphabetically(mv)
if err != nil {
return dyn.InvalidValue, err
}
return skipAndOrder(mv, order, skipFields, dst)
}
// Sort the map alphabetically by keys. This is used to produce stable output for generated YAML files.
func sortMapAlphabetically(mv dyn.Value) (dyn.Value, error) {
sortedMap := dyn.NewMapping()
mapV := mv.MustMap()
keys := mapV.Keys()
slices.SortStableFunc(keys, func(i, j dyn.Value) int {
iKey := i.MustString()
jKey := j.MustString()
if iKey < jKey {
return -1
}
if iKey > jKey {
return 1
}
return 0
})
for _, key := range keys {
value, _ := mapV.Get(key)
var err error
if value.Kind() == dyn.KindMap {
value, err = sortMapAlphabetically(value)
if err != nil {
return dyn.InvalidValue, err
}
}
err = sortedMap.Set(key, value)
if err != nil {
return dyn.InvalidValue, err
}
}
return dyn.V(sortedMap), nil
}
func skipAndOrder(mv dyn.Value, order *Order, skipFields []string, dst map[string]dyn.Value) (dyn.Value, error) {
for _, pair := range mv.MustMap().Pairs() {
k := pair.Key.MustString()
@ -44,7 +85,11 @@ func skipAndOrder(mv dyn.Value, order *Order, skipFields []string, dst map[strin
continue
}
dst[k] = dyn.NewValue(v.Value(), []dyn.Location{{Line: order.Get(k)}})
if order == nil {
dst[k] = v
} else {
dst[k] = dyn.NewValue(v.Value(), []dyn.Location{{Line: order.Get(k)}})
}
}
return dyn.V(dst), nil

View File

@ -7,6 +7,54 @@ import (
assert "github.com/databricks/cli/libs/dyn/dynassert"
)
func TestConvertToMap(t *testing.T) {
type test struct {
Name string `json:"name"`
Map map[string]string `json:"map"`
List []string `json:"list"`
LongNameField string `json:"long_name_field"`
ForceSendFields []string `json:"-"`
Format string `json:"format"`
}
v := &test{
Name: "test",
Map: map[string]string{
"key2": "value2",
"key1": "value1",
},
List: []string{"a", "b", "c"},
ForceSendFields: []string{
"Name",
},
LongNameField: "long name goes here",
}
result, err := ConvertToMapValue(v, nil, []string{"format"}, map[string]dyn.Value{})
assert.NoError(t, err)
assert.Equal(t, dyn.V(map[string]dyn.Value{
"list": dyn.NewValue(
[]dyn.Value{
dyn.V("a"),
dyn.V("b"),
dyn.V("c"),
},
[]dyn.Location{},
),
"long_name_field": dyn.NewValue("long name goes here", []dyn.Location{}),
"map": dyn.NewValue(
map[string]dyn.Value{
"key1": dyn.V("value1"),
"key2": dyn.V("value2"),
},
[]dyn.Location{},
),
"name": dyn.NewValue(
"test",
[]dyn.Location{},
),
}), result)
}
func TestConvertToMapValueWithOrder(t *testing.T) {
type test struct {
Name string `json:"name"`