package python import ( "encoding/json" "fmt" "io" pathlib "path" "path/filepath" "github.com/databricks/cli/libs/dyn" ) // generatedFileName is used as the virtual file name for YAML generated by Python code. // // mergePythonLocations replaces dyn.Location with generatedFileName with locations loaded // from locations.json const generatedFileName = "__generated_by_python__.yml" // pythonLocations is data structure for efficient location lookup for a given path // // Locations form a tree, and we assign locations of the closest ancestor to each dyn.Value based on its path. // We implement it as a trie (prefix tree) where keys are components of the path. With that, lookups are O(n) // where n is the number of components in the path. // // For example, with locations.json: // // {"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5} // {"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5} // {"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7} // // - resources.jobs.job_0.tasks[0].task_key is located at job_0.py:10:5 // // - resources.jobs.job_0.tasks[0].email_notifications is located at job_0.py:3:5, // because we use the location of the job as the most precise approximation. // // See pythonLocationEntry for the structure of a single entry in locations.json type pythonLocations struct { // descendants referenced by index, e.g. '.foo' keys map[string]*pythonLocations // descendants referenced by key, e.g. '[0]' indexes map[int]*pythonLocations // location for the current node if it exists location dyn.Location // if true, location is present exists bool } // pythonLocationEntry is a single entry in locations.json type pythonLocationEntry struct { Path string `json:"path"` File string `json:"file"` Line int `json:"line"` Column int `json:"column"` } // mergePythonLocations applies locations from Python mutator into given dyn.Value // // The primary use-case is to merge locations.json with output.json, so that any // validation errors will point to Python source code instead of generated YAML. func mergePythonLocations(value dyn.Value, locations *pythonLocations) (dyn.Value, error) { return dyn.Walk(value, func(path dyn.Path, value dyn.Value) (dyn.Value, error) { newLocation, ok := findPythonLocation(locations, path) if !ok { return value, nil } // The first item in the list is the "last" location used for error reporting // // Loaded YAML uses virtual file path as location, we remove any of such references, // because they should use 'newLocation' instead. // // We preserve any previous non-virtual locations in case when Python function modified // resource defined in YAML. newLocations := append( []dyn.Location{newLocation}, removeVirtualLocations(value.Locations())..., ) return value.WithLocations(newLocations), nil }) } func removeVirtualLocations(locations []dyn.Location) []dyn.Location { var newLocations []dyn.Location for _, location := range locations { if filepath.Base(location.File) == generatedFileName { continue } newLocations = append(newLocations, location) } return newLocations } // parsePythonLocations parses locations.json from the Python mutator. // // locations file is newline-separated JSON objects with pythonLocationEntry structure. func parsePythonLocations(bundleRoot string, input io.Reader) (*pythonLocations, error) { decoder := json.NewDecoder(input) locations := newPythonLocations() for decoder.More() { var entry pythonLocationEntry err := decoder.Decode(&entry) if err != nil { return nil, fmt.Errorf("failed to parse python location: %s", err) } path, err := dyn.NewPathFromString(entry.Path) if err != nil { return nil, fmt.Errorf("failed to parse python location: %s", err) } // Output can contain both relative paths and absolute paths outside of bundle root. // Mutator pipeline expects all path to be absolute at this point, so make all paths absolute. if !pathlib.IsAbs(entry.File) { entry.File = filepath.Join(bundleRoot, entry.File) } location := dyn.Location{ File: entry.File, Line: entry.Line, Column: entry.Column, } putPythonLocation(locations, path, location) } return locations, nil } // putPythonLocation puts the location to the trie for the given path func putPythonLocation(trie *pythonLocations, path dyn.Path, location dyn.Location) { currentNode := trie for _, component := range path { if key := component.Key(); key != "" { if _, ok := currentNode.keys[key]; !ok { currentNode.keys[key] = newPythonLocations() } currentNode = currentNode.keys[key] } else { index := component.Index() if _, ok := currentNode.indexes[index]; !ok { currentNode.indexes[index] = newPythonLocations() } currentNode = currentNode.indexes[index] } } currentNode.location = location currentNode.exists = true } // newPythonLocations creates a new trie node func newPythonLocations() *pythonLocations { return &pythonLocations{ keys: make(map[string]*pythonLocations), indexes: make(map[int]*pythonLocations), } } // findPythonLocation finds the location or closest ancestor location in the trie for the given path // if no ancestor or exact location is found, false is returned. func findPythonLocation(locations *pythonLocations, path dyn.Path) (dyn.Location, bool) { currentNode := locations lastLocation := locations.location exists := locations.exists for _, component := range path { if key := component.Key(); key != "" { if _, ok := currentNode.keys[key]; !ok { break } currentNode = currentNode.keys[key] } else { index := component.Index() if _, ok := currentNode.indexes[index]; !ok { break } currentNode = currentNode.indexes[index] } if currentNode.exists { lastLocation = currentNode.location exists = true } } return lastLocation, exists }