package python

import (
	"encoding/json"
	"fmt"
	"io"
	"path/filepath"

	"github.com/databricks/cli/libs/dyn"
)

// generatedFileName is used as the virtual file name for YAML generated by Python code.
//
// mergePythonLocations replaces dyn.Location with generatedFileName with locations loaded
// from locations.json
const generatedFileName = "__generated_by_python__.yml"

// pythonLocations is data structure for efficient location lookup for a given path
//
// Locations form a tree, and we assign locations of the closest ancestor to each dyn.Value based on its path.
// We implement it as a trie (prefix tree) where keys are components of the path. With that, lookups are O(n)
// where n is the number of components in the path.
//
// For example, with locations.json:
//
//		{"path": "resources.jobs.job_0", "file": "resources/job_0.py", "line": 3, "column": 5}
//		{"path": "resources.jobs.job_0.tasks[0].task_key", "file": "resources/job_0.py", "line": 10, "column": 5}
//		{"path": "resources.jobs.job_1", "file": "resources/job_1.py", "line": 5, "column": 7}
//
//	- resources.jobs.job_0.tasks[0].task_key is located at job_0.py:10:5
//
//	- resources.jobs.job_0.tasks[0].email_notifications is located at job_0.py:3:5,
//	  because we use the location of the job as the most precise approximation.
//
// See pythonLocationEntry for the structure of a single entry in locations.json
type pythonLocations struct {
	// descendants referenced by index, e.g. '.foo'
	keys map[string]*pythonLocations

	// descendants referenced by key, e.g. '[0]'
	indexes map[int]*pythonLocations

	// location for the current node if it exists
	location dyn.Location

	// if true, location is present
	exists bool
}

// pythonLocationEntry is a single entry in locations.json
type pythonLocationEntry struct {
	Path   string `json:"path"`
	File   string `json:"file"`
	Line   int    `json:"line"`
	Column int    `json:"column"`
}

// mergePythonLocations applies locations from Python mutator into given dyn.Value
//
// The primary use-case is to merge locations.json with output.json, so that any
// validation errors will point to Python source code instead of generated YAML.
func mergePythonLocations(value dyn.Value, locations *pythonLocations) (dyn.Value, error) {
	return dyn.Walk(value, func(path dyn.Path, value dyn.Value) (dyn.Value, error) {
		newLocation, ok := findPythonLocation(locations, path)
		if !ok {
			return value, nil
		}

		// The first item in the list is the "last" location used for error reporting
		//
		// Loaded YAML uses virtual file path as location, we remove any of such references,
		// because they should use 'newLocation' instead.
		//
		// We preserve any previous non-virtual locations in case when Python function modified
		// resource defined in YAML.
		newLocations := append(
			[]dyn.Location{newLocation},
			removeVirtualLocations(value.Locations())...,
		)

		return value.WithLocations(newLocations), nil
	})
}

func removeVirtualLocations(locations []dyn.Location) []dyn.Location {
	var newLocations []dyn.Location

	for _, location := range locations {
		if filepath.Base(location.File) == generatedFileName {
			continue
		}

		newLocations = append(newLocations, location)
	}

	return newLocations
}

// parsePythonLocations parses locations.json from the Python mutator.
//
// locations file is newline-separated JSON objects with pythonLocationEntry structure.
func parsePythonLocations(input io.Reader) (*pythonLocations, error) {
	decoder := json.NewDecoder(input)
	locations := newPythonLocations()

	for decoder.More() {
		var entry pythonLocationEntry

		err := decoder.Decode(&entry)
		if err != nil {
			return nil, fmt.Errorf("failed to parse python location: %s", err)
		}

		path, err := dyn.NewPathFromString(entry.Path)
		if err != nil {
			return nil, fmt.Errorf("failed to parse python location: %s", err)
		}

		location := dyn.Location{
			File:   entry.File,
			Line:   entry.Line,
			Column: entry.Column,
		}

		putPythonLocation(locations, path, location)
	}

	return locations, nil
}

// putPythonLocation puts the location to the trie for the given path
func putPythonLocation(trie *pythonLocations, path dyn.Path, location dyn.Location) {
	currentNode := trie

	for _, component := range path {
		if key := component.Key(); key != "" {
			if _, ok := currentNode.keys[key]; !ok {
				currentNode.keys[key] = newPythonLocations()
			}

			currentNode = currentNode.keys[key]
		} else {
			index := component.Index()
			if _, ok := currentNode.indexes[index]; !ok {
				currentNode.indexes[index] = newPythonLocations()
			}

			currentNode = currentNode.indexes[index]
		}
	}

	currentNode.location = location
	currentNode.exists = true
}

// newPythonLocations creates a new trie node
func newPythonLocations() *pythonLocations {
	return &pythonLocations{
		keys:    make(map[string]*pythonLocations),
		indexes: make(map[int]*pythonLocations),
	}
}

// findPythonLocation finds the location or closest ancestor location in the trie for the given path
// if no ancestor or exact location is found, false is returned.
func findPythonLocation(locations *pythonLocations, path dyn.Path) (dyn.Location, bool) {
	currentNode := locations
	lastLocation := locations.location
	exists := locations.exists

	for _, component := range path {
		if key := component.Key(); key != "" {
			if _, ok := currentNode.keys[key]; !ok {
				break
			}

			currentNode = currentNode.keys[key]
		} else {
			index := component.Index()
			if _, ok := currentNode.indexes[index]; !ok {
				break
			}

			currentNode = currentNode.indexes[index]
		}

		if currentNode.exists {
			lastLocation = currentNode.location
			exists = true
		}
	}

	return lastLocation, exists
}