databricks-cli/bundle/deploy/terraform/init.go

304 lines
9.2 KiB
Go
Raw Normal View History

2022-12-15 14:12:47 +00:00
package terraform
import (
"context"
"errors"
"fmt"
"io/fs"
"os"
2022-12-15 14:12:47 +00:00
"os/exec"
"path/filepath"
"runtime"
"strings"
"time"
2022-12-15 14:12:47 +00:00
"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
Add `bundle debug terraform` command (#1294) - Add `bundle debug terraform` command. It prints versions of the Terraform and the Databricks Terraform provider. In the text mode it also explains how to setup the CLI in environments with restricted internet access. - Use `DATABRICKS_TF_EXEC_PATH` env var to point Databricks CLI to the Terraform binary. The CLI only uses it if `DATABRICKS_TF_VERSION` matches the currently used terraform version. - Use `DATABRICKS_TF_CLI_CONFIG_FILE` env var to point Terraform CLI config that points to the filesystem mirror for the Databricks provider. The CLI only uses it if `DATABRICKS_TF_PROVIDER_VERSION` matches the currently used provider version. Relevant PR on the VSCode extension side: https://github.com/databricks/databricks-vscode/pull/1147 Example output of the `databricks bundle debug terraform`: ``` Terraform version: 1.5.5 Terraform URL: https://releases.hashicorp.com/terraform/1.5.5 Databricks Terraform Provider version: 1.38.0 Databricks Terraform Provider URL: https://github.com/databricks/terraform-provider-databricks/releases/tag/v1.38.0 Databricks CLI downloads its Terraform dependencies automatically. If you run the CLI in an air-gapped environment, you can download the dependencies manually and set these environment variables: DATABRICKS_TF_VERSION=1.5.5 DATABRICKS_TF_EXEC_PATH=/path/to/terraform/binary DATABRICKS_TF_PROVIDER_VERSION=1.38.0 DATABRICKS_TF_CLI_CONFIG_FILE=/path/to/terraform/cli/config.tfrc Here is an example *.tfrc configuration file: disable_checkpoint = true provider_installation { filesystem_mirror { path = "/path/to/a/folder/with/databricks/terraform/provider" } } The filesystem mirror path should point to the folder with the Databricks Terraform Provider. The folder should have this structure: /registry.terraform.io/databricks/databricks/terraform-provider-databricks_1.38.0_ARCH.zip For more information about filesystem mirrors, see the Terraform documentation: https://developer.hashicorp.com/terraform/cli/config/config-file#filesystem_mirror ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2024-04-02 12:56:27 +00:00
"github.com/databricks/cli/bundle/internal/tf/schema"
"github.com/databricks/cli/internal/build"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/env"
"github.com/databricks/cli/libs/log"
"github.com/hashicorp/hc-install/product"
"github.com/hashicorp/hc-install/releases"
2022-12-15 14:12:47 +00:00
"github.com/hashicorp/terraform-exec/tfexec"
"golang.org/x/exp/maps"
2022-12-15 14:12:47 +00:00
)
type initialize struct{}
func (m *initialize) Name() string {
return "terraform.Initialize"
}
func (m *initialize) findExecPath(ctx context.Context, b *bundle.Bundle, tf *config.Terraform) (string, error) {
// If set, pass it through [exec.LookPath] to resolve its absolute path.
if tf.ExecPath != "" {
execPath, err := exec.LookPath(tf.ExecPath)
if err != nil {
return "", err
}
tf.ExecPath = execPath
log.Debugf(ctx, "Using Terraform at %s", tf.ExecPath)
return tf.ExecPath, nil
}
Add `bundle debug terraform` command (#1294) - Add `bundle debug terraform` command. It prints versions of the Terraform and the Databricks Terraform provider. In the text mode it also explains how to setup the CLI in environments with restricted internet access. - Use `DATABRICKS_TF_EXEC_PATH` env var to point Databricks CLI to the Terraform binary. The CLI only uses it if `DATABRICKS_TF_VERSION` matches the currently used terraform version. - Use `DATABRICKS_TF_CLI_CONFIG_FILE` env var to point Terraform CLI config that points to the filesystem mirror for the Databricks provider. The CLI only uses it if `DATABRICKS_TF_PROVIDER_VERSION` matches the currently used provider version. Relevant PR on the VSCode extension side: https://github.com/databricks/databricks-vscode/pull/1147 Example output of the `databricks bundle debug terraform`: ``` Terraform version: 1.5.5 Terraform URL: https://releases.hashicorp.com/terraform/1.5.5 Databricks Terraform Provider version: 1.38.0 Databricks Terraform Provider URL: https://github.com/databricks/terraform-provider-databricks/releases/tag/v1.38.0 Databricks CLI downloads its Terraform dependencies automatically. If you run the CLI in an air-gapped environment, you can download the dependencies manually and set these environment variables: DATABRICKS_TF_VERSION=1.5.5 DATABRICKS_TF_EXEC_PATH=/path/to/terraform/binary DATABRICKS_TF_PROVIDER_VERSION=1.38.0 DATABRICKS_TF_CLI_CONFIG_FILE=/path/to/terraform/cli/config.tfrc Here is an example *.tfrc configuration file: disable_checkpoint = true provider_installation { filesystem_mirror { path = "/path/to/a/folder/with/databricks/terraform/provider" } } The filesystem mirror path should point to the folder with the Databricks Terraform Provider. The folder should have this structure: /registry.terraform.io/databricks/databricks/terraform-provider-databricks_1.38.0_ARCH.zip For more information about filesystem mirrors, see the Terraform documentation: https://developer.hashicorp.com/terraform/cli/config/config-file#filesystem_mirror ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2024-04-02 12:56:27 +00:00
// Load exec path from the environment if it matches the currently used version.
envExecPath, err := getEnvVarWithMatchingVersion(ctx, TerraformExecPathEnv, TerraformVersionEnv, TerraformVersion.String())
if err != nil {
return "", err
}
if envExecPath != "" {
tf.ExecPath = envExecPath
log.Debugf(ctx, "Using Terraform from %s at %s", TerraformExecPathEnv, tf.ExecPath)
return tf.ExecPath, nil
}
binDir, err := b.CacheDir(context.Background(), "bin")
if err != nil {
return "", err
}
// If the execPath already exists, return it.
execPath := filepath.Join(binDir, product.Terraform.BinaryName())
_, err = os.Stat(execPath)
if err != nil && !errors.Is(err, fs.ErrNotExist) {
return "", err
}
if err == nil {
tf.ExecPath = execPath
log.Debugf(ctx, "Using Terraform at %s", tf.ExecPath)
return tf.ExecPath, nil
}
// Download Terraform to private bin directory.
installer := &releases.ExactVersion{
Product: product.Terraform,
Add `bundle debug terraform` command (#1294) - Add `bundle debug terraform` command. It prints versions of the Terraform and the Databricks Terraform provider. In the text mode it also explains how to setup the CLI in environments with restricted internet access. - Use `DATABRICKS_TF_EXEC_PATH` env var to point Databricks CLI to the Terraform binary. The CLI only uses it if `DATABRICKS_TF_VERSION` matches the currently used terraform version. - Use `DATABRICKS_TF_CLI_CONFIG_FILE` env var to point Terraform CLI config that points to the filesystem mirror for the Databricks provider. The CLI only uses it if `DATABRICKS_TF_PROVIDER_VERSION` matches the currently used provider version. Relevant PR on the VSCode extension side: https://github.com/databricks/databricks-vscode/pull/1147 Example output of the `databricks bundle debug terraform`: ``` Terraform version: 1.5.5 Terraform URL: https://releases.hashicorp.com/terraform/1.5.5 Databricks Terraform Provider version: 1.38.0 Databricks Terraform Provider URL: https://github.com/databricks/terraform-provider-databricks/releases/tag/v1.38.0 Databricks CLI downloads its Terraform dependencies automatically. If you run the CLI in an air-gapped environment, you can download the dependencies manually and set these environment variables: DATABRICKS_TF_VERSION=1.5.5 DATABRICKS_TF_EXEC_PATH=/path/to/terraform/binary DATABRICKS_TF_PROVIDER_VERSION=1.38.0 DATABRICKS_TF_CLI_CONFIG_FILE=/path/to/terraform/cli/config.tfrc Here is an example *.tfrc configuration file: disable_checkpoint = true provider_installation { filesystem_mirror { path = "/path/to/a/folder/with/databricks/terraform/provider" } } The filesystem mirror path should point to the folder with the Databricks Terraform Provider. The folder should have this structure: /registry.terraform.io/databricks/databricks/terraform-provider-databricks_1.38.0_ARCH.zip For more information about filesystem mirrors, see the Terraform documentation: https://developer.hashicorp.com/terraform/cli/config/config-file#filesystem_mirror ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2024-04-02 12:56:27 +00:00
Version: TerraformVersion,
InstallDir: binDir,
Timeout: 1 * time.Minute,
}
execPath, err = installer.Install(ctx)
if err != nil {
return "", fmt.Errorf("error downloading Terraform: %w", err)
}
tf.ExecPath = execPath
log.Debugf(ctx, "Using Terraform at %s", tf.ExecPath)
return tf.ExecPath, nil
}
// This function inherits some environment variables for Terraform CLI.
func inheritEnvVars(ctx context.Context, environ map[string]string) error {
// Include $HOME in set of environment variables to pass along.
home, ok := env.Lookup(ctx, "HOME")
if ok {
environ["HOME"] = home
}
// Include $USERPROFILE in set of environment variables to pass along.
// This variable is used by Azure CLI on Windows to find stored credentials and metadata
userProfile, ok := env.Lookup(ctx, "USERPROFILE")
if ok {
environ["USERPROFILE"] = userProfile
}
// Include $PATH in set of environment variables to pass along.
// This is necessary to ensure that our Terraform provider can use the
// same auxiliary programs (e.g. `az`, or `gcloud`) as the CLI.
path, ok := env.Lookup(ctx, "PATH")
if ok {
environ["PATH"] = path
}
// Include $TF_CLI_CONFIG_FILE to override terraform provider in development.
Add `bundle debug terraform` command (#1294) - Add `bundle debug terraform` command. It prints versions of the Terraform and the Databricks Terraform provider. In the text mode it also explains how to setup the CLI in environments with restricted internet access. - Use `DATABRICKS_TF_EXEC_PATH` env var to point Databricks CLI to the Terraform binary. The CLI only uses it if `DATABRICKS_TF_VERSION` matches the currently used terraform version. - Use `DATABRICKS_TF_CLI_CONFIG_FILE` env var to point Terraform CLI config that points to the filesystem mirror for the Databricks provider. The CLI only uses it if `DATABRICKS_TF_PROVIDER_VERSION` matches the currently used provider version. Relevant PR on the VSCode extension side: https://github.com/databricks/databricks-vscode/pull/1147 Example output of the `databricks bundle debug terraform`: ``` Terraform version: 1.5.5 Terraform URL: https://releases.hashicorp.com/terraform/1.5.5 Databricks Terraform Provider version: 1.38.0 Databricks Terraform Provider URL: https://github.com/databricks/terraform-provider-databricks/releases/tag/v1.38.0 Databricks CLI downloads its Terraform dependencies automatically. If you run the CLI in an air-gapped environment, you can download the dependencies manually and set these environment variables: DATABRICKS_TF_VERSION=1.5.5 DATABRICKS_TF_EXEC_PATH=/path/to/terraform/binary DATABRICKS_TF_PROVIDER_VERSION=1.38.0 DATABRICKS_TF_CLI_CONFIG_FILE=/path/to/terraform/cli/config.tfrc Here is an example *.tfrc configuration file: disable_checkpoint = true provider_installation { filesystem_mirror { path = "/path/to/a/folder/with/databricks/terraform/provider" } } The filesystem mirror path should point to the folder with the Databricks Terraform Provider. The folder should have this structure: /registry.terraform.io/databricks/databricks/terraform-provider-databricks_1.38.0_ARCH.zip For more information about filesystem mirrors, see the Terraform documentation: https://developer.hashicorp.com/terraform/cli/config/config-file#filesystem_mirror ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2024-04-02 12:56:27 +00:00
// See: https://developer.hashicorp.com/terraform/cli/config/config-file#explicit-installation-method-configuration
devConfigFile, ok := env.Lookup(ctx, "TF_CLI_CONFIG_FILE")
if ok {
Add `bundle debug terraform` command (#1294) - Add `bundle debug terraform` command. It prints versions of the Terraform and the Databricks Terraform provider. In the text mode it also explains how to setup the CLI in environments with restricted internet access. - Use `DATABRICKS_TF_EXEC_PATH` env var to point Databricks CLI to the Terraform binary. The CLI only uses it if `DATABRICKS_TF_VERSION` matches the currently used terraform version. - Use `DATABRICKS_TF_CLI_CONFIG_FILE` env var to point Terraform CLI config that points to the filesystem mirror for the Databricks provider. The CLI only uses it if `DATABRICKS_TF_PROVIDER_VERSION` matches the currently used provider version. Relevant PR on the VSCode extension side: https://github.com/databricks/databricks-vscode/pull/1147 Example output of the `databricks bundle debug terraform`: ``` Terraform version: 1.5.5 Terraform URL: https://releases.hashicorp.com/terraform/1.5.5 Databricks Terraform Provider version: 1.38.0 Databricks Terraform Provider URL: https://github.com/databricks/terraform-provider-databricks/releases/tag/v1.38.0 Databricks CLI downloads its Terraform dependencies automatically. If you run the CLI in an air-gapped environment, you can download the dependencies manually and set these environment variables: DATABRICKS_TF_VERSION=1.5.5 DATABRICKS_TF_EXEC_PATH=/path/to/terraform/binary DATABRICKS_TF_PROVIDER_VERSION=1.38.0 DATABRICKS_TF_CLI_CONFIG_FILE=/path/to/terraform/cli/config.tfrc Here is an example *.tfrc configuration file: disable_checkpoint = true provider_installation { filesystem_mirror { path = "/path/to/a/folder/with/databricks/terraform/provider" } } The filesystem mirror path should point to the folder with the Databricks Terraform Provider. The folder should have this structure: /registry.terraform.io/databricks/databricks/terraform-provider-databricks_1.38.0_ARCH.zip For more information about filesystem mirrors, see the Terraform documentation: https://developer.hashicorp.com/terraform/cli/config/config-file#filesystem_mirror ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2024-04-02 12:56:27 +00:00
environ["TF_CLI_CONFIG_FILE"] = devConfigFile
}
// Map $DATABRICKS_TF_CLI_CONFIG_FILE to $TF_CLI_CONFIG_FILE
// VSCode extension provides a file with the "provider_installation.filesystem_mirror" configuration.
// We only use it if the provider version matches the currently used version,
// otherwise terraform will fail to download the right version (even with unrestricted internet access).
configFile, err := getEnvVarWithMatchingVersion(ctx, TerraformCliConfigPathEnv, TerraformProviderVersionEnv, schema.ProviderVersion)
if err != nil {
return err
}
if configFile != "" {
log.Debugf(ctx, "Using Terraform CLI config from %s at %s", TerraformCliConfigPathEnv, configFile)
environ["TF_CLI_CONFIG_FILE"] = configFile
}
return nil
}
Add `bundle debug terraform` command (#1294) - Add `bundle debug terraform` command. It prints versions of the Terraform and the Databricks Terraform provider. In the text mode it also explains how to setup the CLI in environments with restricted internet access. - Use `DATABRICKS_TF_EXEC_PATH` env var to point Databricks CLI to the Terraform binary. The CLI only uses it if `DATABRICKS_TF_VERSION` matches the currently used terraform version. - Use `DATABRICKS_TF_CLI_CONFIG_FILE` env var to point Terraform CLI config that points to the filesystem mirror for the Databricks provider. The CLI only uses it if `DATABRICKS_TF_PROVIDER_VERSION` matches the currently used provider version. Relevant PR on the VSCode extension side: https://github.com/databricks/databricks-vscode/pull/1147 Example output of the `databricks bundle debug terraform`: ``` Terraform version: 1.5.5 Terraform URL: https://releases.hashicorp.com/terraform/1.5.5 Databricks Terraform Provider version: 1.38.0 Databricks Terraform Provider URL: https://github.com/databricks/terraform-provider-databricks/releases/tag/v1.38.0 Databricks CLI downloads its Terraform dependencies automatically. If you run the CLI in an air-gapped environment, you can download the dependencies manually and set these environment variables: DATABRICKS_TF_VERSION=1.5.5 DATABRICKS_TF_EXEC_PATH=/path/to/terraform/binary DATABRICKS_TF_PROVIDER_VERSION=1.38.0 DATABRICKS_TF_CLI_CONFIG_FILE=/path/to/terraform/cli/config.tfrc Here is an example *.tfrc configuration file: disable_checkpoint = true provider_installation { filesystem_mirror { path = "/path/to/a/folder/with/databricks/terraform/provider" } } The filesystem mirror path should point to the folder with the Databricks Terraform Provider. The folder should have this structure: /registry.terraform.io/databricks/databricks/terraform-provider-databricks_1.38.0_ARCH.zip For more information about filesystem mirrors, see the Terraform documentation: https://developer.hashicorp.com/terraform/cli/config/config-file#filesystem_mirror ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2024-04-02 12:56:27 +00:00
// Example: this function will return a value of TF_EXEC_PATH only if the path exists and if TF_VERSION matches the TerraformVersion.
// This function is used for env vars set by the Databricks VSCode extension. The variables are intended to be used by the CLI
// bundled with the Databricks VSCode extension, but users can use different CLI versions in the VSCode terminals, in which case we want to ignore
// the variables if that CLI uses different versions of the dependencies.
func getEnvVarWithMatchingVersion(ctx context.Context, envVarName string, versionVarName string, currentVersion string) (string, error) {
envValue := env.Get(ctx, envVarName)
versionValue := env.Get(ctx, versionVarName)
Add docker images for the CLI (#1353) ## Changes This PR makes changes to support creating a docker image for the CLI with the `terraform` dependencies built in. This is useful for customers that operate in a network-restricted environment. Normally DABs makes API calls to registry.terraform.io to setup the terraform dependencies, with this setup the CLI/DABs will rely on the provider binaries bundled in the docker image. ### Specifically this PR makes the following changes: ---------------- Modifies the CLI release workflow to publish the docker images in the Github Container Registry. URL: https://github.com/databricks/cli/pkgs/container/cli. We use docker support in `goreleaser` to build and publish the images. Using goreleaser ensures the CLI packaged in the docker image is the same release artifact as the normal releases. For more information see: 1. https://goreleaser.com/cookbooks/multi-platform-docker-images 2. https://goreleaser.com/customization/docker/ Other choices made include: 1. Using `alpine` as the base image. The reason is `alpine` is a small and lightweight linux distribution (~5MB) and an industry standard. 2. Not using [docker manifest](https://docs.docker.com/reference/cli/docker/manifest) to create a multi-arch build. This is because the functionality is still experimental. ------------------ Make the `DATABRICKS_TF_VERSION` and `DATABRICKS_TF_PROVIDER_VERSION` environment variables optional for using the terraform file mirror. While it's not strictly necessary to make the docker image work, it's the "right" behaviour and reduces complexity. The rationale is: - These environment variables here are needed so the Databricks CLI does not accidentally use the file mirror bundled with VSCode if it's incompatible. This does not require the env vars to be mandatory. context: https://github.com/databricks/cli/pull/1294 - This makes the `Dockerfile` and `setup.sh` simpler. We don't need an [entrypoint.sh script to set the version environment variables](https://medium.com/@leonardo5621_66451/learn-how-to-use-entrypoint-scripts-in-docker-images-fede010f172d). This also makes using an interactive terminal with `docker run -it ...` work out of the box. ## Tests Tested manually. -------------------- To test the release pipeline I triggered a couple of dummy releases and verified that the images are built successfully and uploaded to Github. 1. https://github.com/databricks/cli/pkgs/container/cli 3. workflow for release: https://github.com/databricks/cli/actions/runs/8646106333 -------------------- I tested the docker container itself by setting up [Charles](https://www.charlesproxy.com/) as an HTTP proxy and verifying that no HTTP requests are made to `registry.terraform.io` Before: FYI, The Charles web proxy is hosted at localhost:8888. ``` shreyas.goenka@THW32HFW6T bundle-playground % rm -r .databricks shreyas.goenka@THW32HFW6T bundle-playground % HTTP_PROXY="http://localhost:8888" HTTPS_PROXY="http://localhost:8888" cli bundle deploy Uploading bundle files to /Users/shreyas.goenka@databricks.com/.bundle/bundle-playground/default/files... Deploying resources... Updating deployment state... Deployment complete! ``` <img width="1275" alt="Screenshot 2024-04-11 at 3 21 45 PM" src="https://github.com/databricks/cli/assets/88374338/15f37324-afbd-47c0-a40e-330ab232656b"> After: This time bundle deploy is run from inside the docker container. We use `host.docker.internal` to map to localhost on the host machine, and -v to mount the host file system as a volume. ``` shreyas.goenka@THW32HFW6T bundle-playground % docker run -v ~/projects/bundle-playground:/bundle -v ~/.databrickscfg:/root/.databrickscfg -it --entrypoint /bin/sh -e HTTP_PROXY="http://host.docker.internal:8888" -e HTTPS_PROXY="http://host.docker.internal:8888" --network host ghcr.io/databricks/cli:latest-arm64 / # cd /bundle/ /bundle # rm -r .databricks/ /bundle # databricks bundle deploy Uploading bundle files to /Users/shreyas.goenka@databricks.com/.bundle/bundle-playground/default/files... Deploying resources... Updating deployment state... Deployment complete! ``` <img width="1275" alt="Screenshot 2024-04-11 at 3 22 54 PM" src="https://github.com/databricks/cli/assets/88374338/2a8f097e-734b-4b3e-8075-c02e98a1b275">
2024-04-12 15:22:30 +00:00
// return early if the environment variable is not set
if envValue == "" {
log.Debugf(ctx, "%s is not defined", envVarName)
Add `bundle debug terraform` command (#1294) - Add `bundle debug terraform` command. It prints versions of the Terraform and the Databricks Terraform provider. In the text mode it also explains how to setup the CLI in environments with restricted internet access. - Use `DATABRICKS_TF_EXEC_PATH` env var to point Databricks CLI to the Terraform binary. The CLI only uses it if `DATABRICKS_TF_VERSION` matches the currently used terraform version. - Use `DATABRICKS_TF_CLI_CONFIG_FILE` env var to point Terraform CLI config that points to the filesystem mirror for the Databricks provider. The CLI only uses it if `DATABRICKS_TF_PROVIDER_VERSION` matches the currently used provider version. Relevant PR on the VSCode extension side: https://github.com/databricks/databricks-vscode/pull/1147 Example output of the `databricks bundle debug terraform`: ``` Terraform version: 1.5.5 Terraform URL: https://releases.hashicorp.com/terraform/1.5.5 Databricks Terraform Provider version: 1.38.0 Databricks Terraform Provider URL: https://github.com/databricks/terraform-provider-databricks/releases/tag/v1.38.0 Databricks CLI downloads its Terraform dependencies automatically. If you run the CLI in an air-gapped environment, you can download the dependencies manually and set these environment variables: DATABRICKS_TF_VERSION=1.5.5 DATABRICKS_TF_EXEC_PATH=/path/to/terraform/binary DATABRICKS_TF_PROVIDER_VERSION=1.38.0 DATABRICKS_TF_CLI_CONFIG_FILE=/path/to/terraform/cli/config.tfrc Here is an example *.tfrc configuration file: disable_checkpoint = true provider_installation { filesystem_mirror { path = "/path/to/a/folder/with/databricks/terraform/provider" } } The filesystem mirror path should point to the folder with the Databricks Terraform Provider. The folder should have this structure: /registry.terraform.io/databricks/databricks/terraform-provider-databricks_1.38.0_ARCH.zip For more information about filesystem mirrors, see the Terraform documentation: https://developer.hashicorp.com/terraform/cli/config/config-file#filesystem_mirror ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2024-04-02 12:56:27 +00:00
return "", nil
}
Add docker images for the CLI (#1353) ## Changes This PR makes changes to support creating a docker image for the CLI with the `terraform` dependencies built in. This is useful for customers that operate in a network-restricted environment. Normally DABs makes API calls to registry.terraform.io to setup the terraform dependencies, with this setup the CLI/DABs will rely on the provider binaries bundled in the docker image. ### Specifically this PR makes the following changes: ---------------- Modifies the CLI release workflow to publish the docker images in the Github Container Registry. URL: https://github.com/databricks/cli/pkgs/container/cli. We use docker support in `goreleaser` to build and publish the images. Using goreleaser ensures the CLI packaged in the docker image is the same release artifact as the normal releases. For more information see: 1. https://goreleaser.com/cookbooks/multi-platform-docker-images 2. https://goreleaser.com/customization/docker/ Other choices made include: 1. Using `alpine` as the base image. The reason is `alpine` is a small and lightweight linux distribution (~5MB) and an industry standard. 2. Not using [docker manifest](https://docs.docker.com/reference/cli/docker/manifest) to create a multi-arch build. This is because the functionality is still experimental. ------------------ Make the `DATABRICKS_TF_VERSION` and `DATABRICKS_TF_PROVIDER_VERSION` environment variables optional for using the terraform file mirror. While it's not strictly necessary to make the docker image work, it's the "right" behaviour and reduces complexity. The rationale is: - These environment variables here are needed so the Databricks CLI does not accidentally use the file mirror bundled with VSCode if it's incompatible. This does not require the env vars to be mandatory. context: https://github.com/databricks/cli/pull/1294 - This makes the `Dockerfile` and `setup.sh` simpler. We don't need an [entrypoint.sh script to set the version environment variables](https://medium.com/@leonardo5621_66451/learn-how-to-use-entrypoint-scripts-in-docker-images-fede010f172d). This also makes using an interactive terminal with `docker run -it ...` work out of the box. ## Tests Tested manually. -------------------- To test the release pipeline I triggered a couple of dummy releases and verified that the images are built successfully and uploaded to Github. 1. https://github.com/databricks/cli/pkgs/container/cli 3. workflow for release: https://github.com/databricks/cli/actions/runs/8646106333 -------------------- I tested the docker container itself by setting up [Charles](https://www.charlesproxy.com/) as an HTTP proxy and verifying that no HTTP requests are made to `registry.terraform.io` Before: FYI, The Charles web proxy is hosted at localhost:8888. ``` shreyas.goenka@THW32HFW6T bundle-playground % rm -r .databricks shreyas.goenka@THW32HFW6T bundle-playground % HTTP_PROXY="http://localhost:8888" HTTPS_PROXY="http://localhost:8888" cli bundle deploy Uploading bundle files to /Users/shreyas.goenka@databricks.com/.bundle/bundle-playground/default/files... Deploying resources... Updating deployment state... Deployment complete! ``` <img width="1275" alt="Screenshot 2024-04-11 at 3 21 45 PM" src="https://github.com/databricks/cli/assets/88374338/15f37324-afbd-47c0-a40e-330ab232656b"> After: This time bundle deploy is run from inside the docker container. We use `host.docker.internal` to map to localhost on the host machine, and -v to mount the host file system as a volume. ``` shreyas.goenka@THW32HFW6T bundle-playground % docker run -v ~/projects/bundle-playground:/bundle -v ~/.databrickscfg:/root/.databrickscfg -it --entrypoint /bin/sh -e HTTP_PROXY="http://host.docker.internal:8888" -e HTTPS_PROXY="http://host.docker.internal:8888" --network host ghcr.io/databricks/cli:latest-arm64 / # cd /bundle/ /bundle # rm -r .databricks/ /bundle # databricks bundle deploy Uploading bundle files to /Users/shreyas.goenka@databricks.com/.bundle/bundle-playground/default/files... Deploying resources... Updating deployment state... Deployment complete! ``` <img width="1275" alt="Screenshot 2024-04-11 at 3 22 54 PM" src="https://github.com/databricks/cli/assets/88374338/2a8f097e-734b-4b3e-8075-c02e98a1b275">
2024-04-12 15:22:30 +00:00
// If the path does not exist, we return early.
Add `bundle debug terraform` command (#1294) - Add `bundle debug terraform` command. It prints versions of the Terraform and the Databricks Terraform provider. In the text mode it also explains how to setup the CLI in environments with restricted internet access. - Use `DATABRICKS_TF_EXEC_PATH` env var to point Databricks CLI to the Terraform binary. The CLI only uses it if `DATABRICKS_TF_VERSION` matches the currently used terraform version. - Use `DATABRICKS_TF_CLI_CONFIG_FILE` env var to point Terraform CLI config that points to the filesystem mirror for the Databricks provider. The CLI only uses it if `DATABRICKS_TF_PROVIDER_VERSION` matches the currently used provider version. Relevant PR on the VSCode extension side: https://github.com/databricks/databricks-vscode/pull/1147 Example output of the `databricks bundle debug terraform`: ``` Terraform version: 1.5.5 Terraform URL: https://releases.hashicorp.com/terraform/1.5.5 Databricks Terraform Provider version: 1.38.0 Databricks Terraform Provider URL: https://github.com/databricks/terraform-provider-databricks/releases/tag/v1.38.0 Databricks CLI downloads its Terraform dependencies automatically. If you run the CLI in an air-gapped environment, you can download the dependencies manually and set these environment variables: DATABRICKS_TF_VERSION=1.5.5 DATABRICKS_TF_EXEC_PATH=/path/to/terraform/binary DATABRICKS_TF_PROVIDER_VERSION=1.38.0 DATABRICKS_TF_CLI_CONFIG_FILE=/path/to/terraform/cli/config.tfrc Here is an example *.tfrc configuration file: disable_checkpoint = true provider_installation { filesystem_mirror { path = "/path/to/a/folder/with/databricks/terraform/provider" } } The filesystem mirror path should point to the folder with the Databricks Terraform Provider. The folder should have this structure: /registry.terraform.io/databricks/databricks/terraform-provider-databricks_1.38.0_ARCH.zip For more information about filesystem mirrors, see the Terraform documentation: https://developer.hashicorp.com/terraform/cli/config/config-file#filesystem_mirror ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2024-04-02 12:56:27 +00:00
_, err := os.Stat(envValue)
if err != nil {
if errors.Is(err, fs.ErrNotExist) {
Add docker images for the CLI (#1353) ## Changes This PR makes changes to support creating a docker image for the CLI with the `terraform` dependencies built in. This is useful for customers that operate in a network-restricted environment. Normally DABs makes API calls to registry.terraform.io to setup the terraform dependencies, with this setup the CLI/DABs will rely on the provider binaries bundled in the docker image. ### Specifically this PR makes the following changes: ---------------- Modifies the CLI release workflow to publish the docker images in the Github Container Registry. URL: https://github.com/databricks/cli/pkgs/container/cli. We use docker support in `goreleaser` to build and publish the images. Using goreleaser ensures the CLI packaged in the docker image is the same release artifact as the normal releases. For more information see: 1. https://goreleaser.com/cookbooks/multi-platform-docker-images 2. https://goreleaser.com/customization/docker/ Other choices made include: 1. Using `alpine` as the base image. The reason is `alpine` is a small and lightweight linux distribution (~5MB) and an industry standard. 2. Not using [docker manifest](https://docs.docker.com/reference/cli/docker/manifest) to create a multi-arch build. This is because the functionality is still experimental. ------------------ Make the `DATABRICKS_TF_VERSION` and `DATABRICKS_TF_PROVIDER_VERSION` environment variables optional for using the terraform file mirror. While it's not strictly necessary to make the docker image work, it's the "right" behaviour and reduces complexity. The rationale is: - These environment variables here are needed so the Databricks CLI does not accidentally use the file mirror bundled with VSCode if it's incompatible. This does not require the env vars to be mandatory. context: https://github.com/databricks/cli/pull/1294 - This makes the `Dockerfile` and `setup.sh` simpler. We don't need an [entrypoint.sh script to set the version environment variables](https://medium.com/@leonardo5621_66451/learn-how-to-use-entrypoint-scripts-in-docker-images-fede010f172d). This also makes using an interactive terminal with `docker run -it ...` work out of the box. ## Tests Tested manually. -------------------- To test the release pipeline I triggered a couple of dummy releases and verified that the images are built successfully and uploaded to Github. 1. https://github.com/databricks/cli/pkgs/container/cli 3. workflow for release: https://github.com/databricks/cli/actions/runs/8646106333 -------------------- I tested the docker container itself by setting up [Charles](https://www.charlesproxy.com/) as an HTTP proxy and verifying that no HTTP requests are made to `registry.terraform.io` Before: FYI, The Charles web proxy is hosted at localhost:8888. ``` shreyas.goenka@THW32HFW6T bundle-playground % rm -r .databricks shreyas.goenka@THW32HFW6T bundle-playground % HTTP_PROXY="http://localhost:8888" HTTPS_PROXY="http://localhost:8888" cli bundle deploy Uploading bundle files to /Users/shreyas.goenka@databricks.com/.bundle/bundle-playground/default/files... Deploying resources... Updating deployment state... Deployment complete! ``` <img width="1275" alt="Screenshot 2024-04-11 at 3 21 45 PM" src="https://github.com/databricks/cli/assets/88374338/15f37324-afbd-47c0-a40e-330ab232656b"> After: This time bundle deploy is run from inside the docker container. We use `host.docker.internal` to map to localhost on the host machine, and -v to mount the host file system as a volume. ``` shreyas.goenka@THW32HFW6T bundle-playground % docker run -v ~/projects/bundle-playground:/bundle -v ~/.databrickscfg:/root/.databrickscfg -it --entrypoint /bin/sh -e HTTP_PROXY="http://host.docker.internal:8888" -e HTTPS_PROXY="http://host.docker.internal:8888" --network host ghcr.io/databricks/cli:latest-arm64 / # cd /bundle/ /bundle # rm -r .databricks/ /bundle # databricks bundle deploy Uploading bundle files to /Users/shreyas.goenka@databricks.com/.bundle/bundle-playground/default/files... Deploying resources... Updating deployment state... Deployment complete! ``` <img width="1275" alt="Screenshot 2024-04-11 at 3 22 54 PM" src="https://github.com/databricks/cli/assets/88374338/2a8f097e-734b-4b3e-8075-c02e98a1b275">
2024-04-12 15:22:30 +00:00
log.Debugf(ctx, "%s at %s does not exist", envVarName, envValue)
Add `bundle debug terraform` command (#1294) - Add `bundle debug terraform` command. It prints versions of the Terraform and the Databricks Terraform provider. In the text mode it also explains how to setup the CLI in environments with restricted internet access. - Use `DATABRICKS_TF_EXEC_PATH` env var to point Databricks CLI to the Terraform binary. The CLI only uses it if `DATABRICKS_TF_VERSION` matches the currently used terraform version. - Use `DATABRICKS_TF_CLI_CONFIG_FILE` env var to point Terraform CLI config that points to the filesystem mirror for the Databricks provider. The CLI only uses it if `DATABRICKS_TF_PROVIDER_VERSION` matches the currently used provider version. Relevant PR on the VSCode extension side: https://github.com/databricks/databricks-vscode/pull/1147 Example output of the `databricks bundle debug terraform`: ``` Terraform version: 1.5.5 Terraform URL: https://releases.hashicorp.com/terraform/1.5.5 Databricks Terraform Provider version: 1.38.0 Databricks Terraform Provider URL: https://github.com/databricks/terraform-provider-databricks/releases/tag/v1.38.0 Databricks CLI downloads its Terraform dependencies automatically. If you run the CLI in an air-gapped environment, you can download the dependencies manually and set these environment variables: DATABRICKS_TF_VERSION=1.5.5 DATABRICKS_TF_EXEC_PATH=/path/to/terraform/binary DATABRICKS_TF_PROVIDER_VERSION=1.38.0 DATABRICKS_TF_CLI_CONFIG_FILE=/path/to/terraform/cli/config.tfrc Here is an example *.tfrc configuration file: disable_checkpoint = true provider_installation { filesystem_mirror { path = "/path/to/a/folder/with/databricks/terraform/provider" } } The filesystem mirror path should point to the folder with the Databricks Terraform Provider. The folder should have this structure: /registry.terraform.io/databricks/databricks/terraform-provider-databricks_1.38.0_ARCH.zip For more information about filesystem mirrors, see the Terraform documentation: https://developer.hashicorp.com/terraform/cli/config/config-file#filesystem_mirror ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2024-04-02 12:56:27 +00:00
return "", nil
} else {
return "", err
}
}
Add docker images for the CLI (#1353) ## Changes This PR makes changes to support creating a docker image for the CLI with the `terraform` dependencies built in. This is useful for customers that operate in a network-restricted environment. Normally DABs makes API calls to registry.terraform.io to setup the terraform dependencies, with this setup the CLI/DABs will rely on the provider binaries bundled in the docker image. ### Specifically this PR makes the following changes: ---------------- Modifies the CLI release workflow to publish the docker images in the Github Container Registry. URL: https://github.com/databricks/cli/pkgs/container/cli. We use docker support in `goreleaser` to build and publish the images. Using goreleaser ensures the CLI packaged in the docker image is the same release artifact as the normal releases. For more information see: 1. https://goreleaser.com/cookbooks/multi-platform-docker-images 2. https://goreleaser.com/customization/docker/ Other choices made include: 1. Using `alpine` as the base image. The reason is `alpine` is a small and lightweight linux distribution (~5MB) and an industry standard. 2. Not using [docker manifest](https://docs.docker.com/reference/cli/docker/manifest) to create a multi-arch build. This is because the functionality is still experimental. ------------------ Make the `DATABRICKS_TF_VERSION` and `DATABRICKS_TF_PROVIDER_VERSION` environment variables optional for using the terraform file mirror. While it's not strictly necessary to make the docker image work, it's the "right" behaviour and reduces complexity. The rationale is: - These environment variables here are needed so the Databricks CLI does not accidentally use the file mirror bundled with VSCode if it's incompatible. This does not require the env vars to be mandatory. context: https://github.com/databricks/cli/pull/1294 - This makes the `Dockerfile` and `setup.sh` simpler. We don't need an [entrypoint.sh script to set the version environment variables](https://medium.com/@leonardo5621_66451/learn-how-to-use-entrypoint-scripts-in-docker-images-fede010f172d). This also makes using an interactive terminal with `docker run -it ...` work out of the box. ## Tests Tested manually. -------------------- To test the release pipeline I triggered a couple of dummy releases and verified that the images are built successfully and uploaded to Github. 1. https://github.com/databricks/cli/pkgs/container/cli 3. workflow for release: https://github.com/databricks/cli/actions/runs/8646106333 -------------------- I tested the docker container itself by setting up [Charles](https://www.charlesproxy.com/) as an HTTP proxy and verifying that no HTTP requests are made to `registry.terraform.io` Before: FYI, The Charles web proxy is hosted at localhost:8888. ``` shreyas.goenka@THW32HFW6T bundle-playground % rm -r .databricks shreyas.goenka@THW32HFW6T bundle-playground % HTTP_PROXY="http://localhost:8888" HTTPS_PROXY="http://localhost:8888" cli bundle deploy Uploading bundle files to /Users/shreyas.goenka@databricks.com/.bundle/bundle-playground/default/files... Deploying resources... Updating deployment state... Deployment complete! ``` <img width="1275" alt="Screenshot 2024-04-11 at 3 21 45 PM" src="https://github.com/databricks/cli/assets/88374338/15f37324-afbd-47c0-a40e-330ab232656b"> After: This time bundle deploy is run from inside the docker container. We use `host.docker.internal` to map to localhost on the host machine, and -v to mount the host file system as a volume. ``` shreyas.goenka@THW32HFW6T bundle-playground % docker run -v ~/projects/bundle-playground:/bundle -v ~/.databrickscfg:/root/.databrickscfg -it --entrypoint /bin/sh -e HTTP_PROXY="http://host.docker.internal:8888" -e HTTPS_PROXY="http://host.docker.internal:8888" --network host ghcr.io/databricks/cli:latest-arm64 / # cd /bundle/ /bundle # rm -r .databricks/ /bundle # databricks bundle deploy Uploading bundle files to /Users/shreyas.goenka@databricks.com/.bundle/bundle-playground/default/files... Deploying resources... Updating deployment state... Deployment complete! ``` <img width="1275" alt="Screenshot 2024-04-11 at 3 22 54 PM" src="https://github.com/databricks/cli/assets/88374338/2a8f097e-734b-4b3e-8075-c02e98a1b275">
2024-04-12 15:22:30 +00:00
// If the version environment variable is not set, we directly return the value of the environment variable.
if versionValue == "" {
return envValue, nil
}
// When the version environment variable is set, we check if it matches the current version.
// If it does not match, we return an empty string.
if versionValue != currentVersion {
log.Debugf(ctx, "%s as %s does not match the current version %s, ignoring %s", versionVarName, versionValue, currentVersion, envVarName)
return "", nil
}
Add `bundle debug terraform` command (#1294) - Add `bundle debug terraform` command. It prints versions of the Terraform and the Databricks Terraform provider. In the text mode it also explains how to setup the CLI in environments with restricted internet access. - Use `DATABRICKS_TF_EXEC_PATH` env var to point Databricks CLI to the Terraform binary. The CLI only uses it if `DATABRICKS_TF_VERSION` matches the currently used terraform version. - Use `DATABRICKS_TF_CLI_CONFIG_FILE` env var to point Terraform CLI config that points to the filesystem mirror for the Databricks provider. The CLI only uses it if `DATABRICKS_TF_PROVIDER_VERSION` matches the currently used provider version. Relevant PR on the VSCode extension side: https://github.com/databricks/databricks-vscode/pull/1147 Example output of the `databricks bundle debug terraform`: ``` Terraform version: 1.5.5 Terraform URL: https://releases.hashicorp.com/terraform/1.5.5 Databricks Terraform Provider version: 1.38.0 Databricks Terraform Provider URL: https://github.com/databricks/terraform-provider-databricks/releases/tag/v1.38.0 Databricks CLI downloads its Terraform dependencies automatically. If you run the CLI in an air-gapped environment, you can download the dependencies manually and set these environment variables: DATABRICKS_TF_VERSION=1.5.5 DATABRICKS_TF_EXEC_PATH=/path/to/terraform/binary DATABRICKS_TF_PROVIDER_VERSION=1.38.0 DATABRICKS_TF_CLI_CONFIG_FILE=/path/to/terraform/cli/config.tfrc Here is an example *.tfrc configuration file: disable_checkpoint = true provider_installation { filesystem_mirror { path = "/path/to/a/folder/with/databricks/terraform/provider" } } The filesystem mirror path should point to the folder with the Databricks Terraform Provider. The folder should have this structure: /registry.terraform.io/databricks/databricks/terraform-provider-databricks_1.38.0_ARCH.zip For more information about filesystem mirrors, see the Terraform documentation: https://developer.hashicorp.com/terraform/cli/config/config-file#filesystem_mirror ``` --------- Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
2024-04-02 12:56:27 +00:00
return envValue, nil
}
// This function sets temp dir location for terraform to use. If user does not
// specify anything here, we fall back to a `tmp` directory in the bundle's cache
// directory
//
// This is necessary to avoid trying to create temporary files in directories
// the CLI and its dependencies do not have access to.
//
// see: os.TempDir for more context
func setTempDirEnvVars(ctx context.Context, environ map[string]string, b *bundle.Bundle) error {
switch runtime.GOOS {
case "windows":
if v, ok := env.Lookup(ctx, "TMP"); ok {
environ["TMP"] = v
} else if v, ok := env.Lookup(ctx, "TEMP"); ok {
environ["TEMP"] = v
} else {
tmpDir, err := b.CacheDir(ctx, "tmp")
if err != nil {
return err
}
environ["TMP"] = tmpDir
}
default:
// If TMPDIR is not set, we let the process fall back to its default value.
if v, ok := env.Lookup(ctx, "TMPDIR"); ok {
environ["TMPDIR"] = v
}
}
return nil
}
// This function passes through all proxy related environment variables.
func setProxyEnvVars(ctx context.Context, environ map[string]string, b *bundle.Bundle) error {
for _, v := range []string{"http_proxy", "https_proxy", "no_proxy"} {
// The case (upper or lower) is notoriously inconsistent for tools on Unix systems.
// We therefore try to read both the upper and lower case versions of the variable.
for _, v := range []string{strings.ToUpper(v), strings.ToLower(v)} {
if val, ok := env.Lookup(ctx, v); ok {
// Only set uppercase version of the variable.
environ[strings.ToUpper(v)] = val
}
}
}
return nil
}
func setUserAgentExtraEnvVar(environ map[string]string, b *bundle.Bundle) error {
// Add "cli" to the user agent in set by the Databricks Terraform provider.
// This will allow us to attribute downstream requests made by the Databricks
// Terraform provider to the CLI.
products := []string{fmt.Sprintf("cli/%s", build.GetInfo().Version)}
if experimental := b.Config.Experimental; experimental != nil {
if experimental.PyDABs.Enabled {
products = append(products, "databricks-pydabs/0.0.0")
}
}
userAgentExtra := strings.Join(products, " ")
if userAgentExtra != "" {
environ["DATABRICKS_USER_AGENT_EXTRA"] = userAgentExtra
}
return nil
}
func (m *initialize) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
tfConfig := b.Config.Bundle.Terraform
if tfConfig == nil {
tfConfig = &config.Terraform{}
b.Config.Bundle.Terraform = tfConfig
}
execPath, err := m.findExecPath(ctx, b, tfConfig)
2022-12-15 14:12:47 +00:00
if err != nil {
return diag.FromErr(err)
2022-12-15 14:12:47 +00:00
}
workingDir, err := Dir(ctx, b)
2022-12-15 14:12:47 +00:00
if err != nil {
return diag.FromErr(err)
2022-12-15 14:12:47 +00:00
}
tf, err := tfexec.NewTerraform(workingDir, execPath)
if err != nil {
return diag.FromErr(err)
2022-12-15 14:12:47 +00:00
}
environ, err := b.AuthEnv()
if err != nil {
return diag.FromErr(err)
}
err = inheritEnvVars(ctx, environ)
if err != nil {
return diag.FromErr(err)
}
// Set the temporary directory environment variables
err = setTempDirEnvVars(ctx, environ, b)
if err != nil {
return diag.FromErr(err)
}
// Set the proxy related environment variables
err = setProxyEnvVars(ctx, environ, b)
if err != nil {
return diag.FromErr(err)
}
err = setUserAgentExtraEnvVar(environ, b)
if err != nil {
return diag.FromErr(err)
}
// Configure environment variables for auth for Terraform to use.
log.Debugf(ctx, "Environment variables for Terraform: %s", strings.Join(maps.Keys(environ), ", "))
err = tf.SetEnv(environ)
if err != nil {
return diag.FromErr(err)
}
2022-12-15 14:12:47 +00:00
b.Terraform = tf
return nil
2022-12-15 14:12:47 +00:00
}
func Initialize() bundle.Mutator {
return &initialize{}
}