mirror of https://github.com/databricks/cli.git
Enable offline install of labs projects (#2049)
## Changes <!-- Summary of your changes that are easy to understand --> This PR makes changes to the labs code base to allow for offline installation of labs projects (like UCX). By passing a flag --offline=true, the code will skip checking for project versions and download code from GitHub and instead will look from the local installation folder. This cmd is useful in systems where there is internet restriction, the user should follow a set-up as follows: - install a labs project on a machine which has internet - zip and copy the file to the intended machine and - run databricks labs install <project name>--offline=true it will look for the code in the same install directory and if present load from there. Closes #1646 related to https://github.com/databrickslabs/ucx/issues/3418 ## Tests <!-- How is this tested? --> Added unit test case and tested. NO_CHANGELOG=true --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com> Co-authored-by: Lennart Kats (databricks) <lennart.kats@databricks.com> Co-authored-by: Denis Bilenko <denis.bilenko@databricks.com> Co-authored-by: Julia Crawford (Databricks) <julia.crawford@databricks.com> Co-authored-by: Ilya Kuznetsov <ilya.kuznetsov@databricks.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Andrew Nester <andrew.nester@databricks.com> Co-authored-by: Anton Nekipelov <226657+anton-107@users.noreply.github.com> Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
This commit is contained in:
parent
b5a7023ef1
commit
fa0a734b3c
|
@ -13,25 +13,31 @@ const cacheTTL = 1 * time.Hour
|
||||||
|
|
||||||
// NewReleaseCache creates a release cache for a repository in the GitHub org.
|
// NewReleaseCache creates a release cache for a repository in the GitHub org.
|
||||||
// Caller has to provide different cache directories for different repositories.
|
// Caller has to provide different cache directories for different repositories.
|
||||||
func NewReleaseCache(org, repo, cacheDir string) *ReleaseCache {
|
func NewReleaseCache(org, repo, cacheDir string, offlineInstall bool) *ReleaseCache {
|
||||||
pattern := fmt.Sprintf("%s-%s-releases", org, repo)
|
pattern := fmt.Sprintf("%s-%s-releases", org, repo)
|
||||||
return &ReleaseCache{
|
return &ReleaseCache{
|
||||||
cache: localcache.NewLocalCache[Versions](cacheDir, pattern, cacheTTL),
|
cache: localcache.NewLocalCache[Versions](cacheDir, pattern, cacheTTL),
|
||||||
Org: org,
|
Org: org,
|
||||||
Repo: repo,
|
Repo: repo,
|
||||||
|
Offline: offlineInstall,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type ReleaseCache struct {
|
type ReleaseCache struct {
|
||||||
cache localcache.LocalCache[Versions]
|
cache localcache.LocalCache[Versions]
|
||||||
Org string
|
Org string
|
||||||
Repo string
|
Repo string
|
||||||
|
Offline bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *ReleaseCache) Load(ctx context.Context) (Versions, error) {
|
func (r *ReleaseCache) Load(ctx context.Context) (Versions, error) {
|
||||||
return r.cache.Load(ctx, func() (Versions, error) {
|
if !r.Offline {
|
||||||
return getVersions(ctx, r.Org, r.Repo)
|
return r.cache.Load(ctx, func() (Versions, error) {
|
||||||
})
|
return getVersions(ctx, r.Org, r.Repo)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
cached, err := r.cache.LoadCache()
|
||||||
|
return cached.Data, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// getVersions is considered to be a private API, as we want the usage go through a cache
|
// getVersions is considered to be a private API, as we want the usage go through a cache
|
||||||
|
|
|
@ -26,7 +26,7 @@ func TestLoadsReleasesForCLI(t *testing.T) {
|
||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
ctx = WithApiOverride(ctx, server.URL)
|
ctx = WithApiOverride(ctx, server.URL)
|
||||||
|
|
||||||
r := NewReleaseCache("databricks", "cli", t.TempDir())
|
r := NewReleaseCache("databricks", "cli", t.TempDir(), false)
|
||||||
all, err := r.Load(ctx)
|
all, err := r.Load(ctx)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Len(t, all, 2)
|
assert.Len(t, all, 2)
|
||||||
|
|
|
@ -7,16 +7,20 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
func newInstallCommand() *cobra.Command {
|
func newInstallCommand() *cobra.Command {
|
||||||
return &cobra.Command{
|
cmd := &cobra.Command{}
|
||||||
Use: "install NAME",
|
var offlineInstall bool
|
||||||
Args: root.ExactArgs(1),
|
|
||||||
Short: "Installs project",
|
cmd.Flags().BoolVar(&offlineInstall, "offline", offlineInstall, `If installing in offline mode, set this flag to true.`)
|
||||||
RunE: func(cmd *cobra.Command, args []string) error {
|
|
||||||
inst, err := project.NewInstaller(cmd, args[0])
|
cmd.Use = "install NAME"
|
||||||
if err != nil {
|
cmd.Args = root.ExactArgs(1)
|
||||||
return err
|
cmd.Short = "Installs project"
|
||||||
}
|
cmd.RunE = func(cmd *cobra.Command, args []string) error {
|
||||||
return inst.Install(cmd.Context())
|
inst, err := project.NewInstaller(cmd, args[0], offlineInstall)
|
||||||
},
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return inst.Install(cmd.Context())
|
||||||
}
|
}
|
||||||
|
return cmd
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,7 +35,7 @@ type LocalCache[T any] struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *LocalCache[T]) Load(ctx context.Context, refresh func() (T, error)) (T, error) {
|
func (r *LocalCache[T]) Load(ctx context.Context, refresh func() (T, error)) (T, error) {
|
||||||
cached, err := r.loadCache()
|
cached, err := r.LoadCache()
|
||||||
if errors.Is(err, fs.ErrNotExist) {
|
if errors.Is(err, fs.ErrNotExist) {
|
||||||
return r.refreshCache(ctx, refresh, r.zero)
|
return r.refreshCache(ctx, refresh, r.zero)
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
|
@ -96,7 +96,7 @@ func (r *LocalCache[T]) FileName() string {
|
||||||
return filepath.Join(r.dir, r.name+".json")
|
return filepath.Join(r.dir, r.name+".json")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *LocalCache[T]) loadCache() (*cached[T], error) {
|
func (r *LocalCache[T]) LoadCache() (*cached[T], error) {
|
||||||
jsonFile := r.FileName()
|
jsonFile := r.FileName()
|
||||||
raw, err := os.ReadFile(r.FileName())
|
raw, err := os.ReadFile(r.FileName())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -54,7 +54,7 @@ func (d *devInstallation) Install(ctx context.Context) error {
|
||||||
return d.Installer.runHook(d.Command)
|
return d.Installer.runHook(d.Command)
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewInstaller(cmd *cobra.Command, name string) (installable, error) {
|
func NewInstaller(cmd *cobra.Command, name string, offlineInstall bool) (installable, error) {
|
||||||
if name == "." {
|
if name == "." {
|
||||||
wd, err := os.Getwd()
|
wd, err := os.Getwd()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -75,28 +75,32 @@ func NewInstaller(cmd *cobra.Command, name string) (installable, error) {
|
||||||
version = "latest"
|
version = "latest"
|
||||||
}
|
}
|
||||||
f := &fetcher{name}
|
f := &fetcher{name}
|
||||||
version, err := f.checkReleasedVersions(cmd, version)
|
|
||||||
|
version, err := f.checkReleasedVersions(cmd, version, offlineInstall)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("version: %w", err)
|
return nil, fmt.Errorf("version: %w", err)
|
||||||
}
|
}
|
||||||
prj, err := f.loadRemoteProjectDefinition(cmd, version)
|
|
||||||
|
prj, err := f.loadRemoteProjectDefinition(cmd, version, offlineInstall)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("remote: %w", err)
|
return nil, fmt.Errorf("remote: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return &installer{
|
return &installer{
|
||||||
Project: prj,
|
Project: prj,
|
||||||
version: version,
|
version: version,
|
||||||
cmd: cmd,
|
cmd: cmd,
|
||||||
|
offlineInstall: offlineInstall,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewUpgrader(cmd *cobra.Command, name string) (*installer, error) {
|
func NewUpgrader(cmd *cobra.Command, name string) (*installer, error) {
|
||||||
f := &fetcher{name}
|
f := &fetcher{name}
|
||||||
version, err := f.checkReleasedVersions(cmd, "latest")
|
version, err := f.checkReleasedVersions(cmd, "latest", false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("version: %w", err)
|
return nil, fmt.Errorf("version: %w", err)
|
||||||
}
|
}
|
||||||
prj, err := f.loadRemoteProjectDefinition(cmd, version)
|
prj, err := f.loadRemoteProjectDefinition(cmd, version, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("remote: %w", err)
|
return nil, fmt.Errorf("remote: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -115,7 +119,7 @@ type fetcher struct {
|
||||||
name string
|
name string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string) (string, error) {
|
func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string, offlineInstall bool) (string, error) {
|
||||||
ctx := cmd.Context()
|
ctx := cmd.Context()
|
||||||
cacheDir, err := PathInLabs(ctx, f.name, "cache")
|
cacheDir, err := PathInLabs(ctx, f.name, "cache")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -123,7 +127,8 @@ func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string) (str
|
||||||
}
|
}
|
||||||
// `databricks labs isntall X` doesn't know which exact version to fetch, so first
|
// `databricks labs isntall X` doesn't know which exact version to fetch, so first
|
||||||
// we fetch all versions and then pick the latest one dynamically.
|
// we fetch all versions and then pick the latest one dynamically.
|
||||||
versions, err := github.NewReleaseCache("databrickslabs", f.name, cacheDir).Load(ctx)
|
var versions github.Versions
|
||||||
|
versions, err = github.NewReleaseCache("databrickslabs", f.name, cacheDir, offlineInstall).Load(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("versions: %w", err)
|
return "", fmt.Errorf("versions: %w", err)
|
||||||
}
|
}
|
||||||
|
@ -140,11 +145,23 @@ func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string) (str
|
||||||
return version, nil
|
return version, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *fetcher) loadRemoteProjectDefinition(cmd *cobra.Command, version string) (*Project, error) {
|
func (i *fetcher) loadRemoteProjectDefinition(cmd *cobra.Command, version string, offlineInstall bool) (*Project, error) {
|
||||||
ctx := cmd.Context()
|
ctx := cmd.Context()
|
||||||
raw, err := github.ReadFileFromRef(ctx, "databrickslabs", i.name, version, "labs.yml")
|
var raw []byte
|
||||||
if err != nil {
|
var err error
|
||||||
return nil, fmt.Errorf("read labs.yml from GitHub: %w", err)
|
if !offlineInstall {
|
||||||
|
raw, err = github.ReadFileFromRef(ctx, "databrickslabs", i.name, version, "labs.yml")
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("read labs.yml from GitHub: %w", err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
libDir, _ := PathInLabs(ctx, i.name, "lib")
|
||||||
|
fileName := filepath.Join(libDir, "labs.yml")
|
||||||
|
raw, err = os.ReadFile(fileName)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("read labs.yml from local path %s: %w", libDir, err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return readFromBytes(ctx, raw)
|
return readFromBytes(ctx, raw)
|
||||||
}
|
}
|
||||||
|
|
|
@ -76,7 +76,8 @@ type installer struct {
|
||||||
// command instance is used for:
|
// command instance is used for:
|
||||||
// - auth profile flag override
|
// - auth profile flag override
|
||||||
// - standard input, output, and error streams
|
// - standard input, output, and error streams
|
||||||
cmd *cobra.Command
|
cmd *cobra.Command
|
||||||
|
offlineInstall bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *installer) Install(ctx context.Context) error {
|
func (i *installer) Install(ctx context.Context) error {
|
||||||
|
@ -101,9 +102,15 @@ func (i *installer) Install(ctx context.Context) error {
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
return fmt.Errorf("login: %w", err)
|
return fmt.Errorf("login: %w", err)
|
||||||
}
|
}
|
||||||
err = i.downloadLibrary(ctx)
|
if !i.offlineInstall {
|
||||||
if err != nil {
|
err = i.downloadLibrary(ctx)
|
||||||
return fmt.Errorf("lib: %w", err)
|
if err != nil {
|
||||||
|
return fmt.Errorf("lib: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err := os.Stat(i.LibDir()); os.IsNotExist(err) {
|
||||||
|
return fmt.Errorf("no local installation found: %w", err)
|
||||||
}
|
}
|
||||||
err = i.setupPythonVirtualEnvironment(ctx, w)
|
err = i.setupPythonVirtualEnvironment(ctx, w)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -241,6 +241,45 @@ func TestInstallerWorksForReleases(t *testing.T) {
|
||||||
r.RunAndExpectOutput("setting up important infrastructure")
|
r.RunAndExpectOutput("setting up important infrastructure")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestOfflineInstallerWorksForReleases(t *testing.T) {
|
||||||
|
// This cmd is useful in systems where there is internet restriction, the user should follow a set-up as follows:
|
||||||
|
// install a labs project on a machine which has internet
|
||||||
|
// zip and copy the file to the intended machine and
|
||||||
|
// run databricks labs install --offline=true
|
||||||
|
// it will look for the code in the same install directory and if present, install from there.
|
||||||
|
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.URL.Path == "/api/2.1/clusters/get" {
|
||||||
|
respondWithJSON(t, w, &compute.ClusterDetails{
|
||||||
|
State: compute.StateRunning,
|
||||||
|
})
|
||||||
|
return
|
||||||
|
}
|
||||||
|
t.Logf("Requested: %s", r.URL.Path)
|
||||||
|
t.FailNow()
|
||||||
|
}))
|
||||||
|
defer server.Close()
|
||||||
|
|
||||||
|
ctx := installerContext(t, server)
|
||||||
|
newHome := copyTestdata(t, "testdata/installed-in-home")
|
||||||
|
ctx = env.WithUserHomeDir(ctx, newHome)
|
||||||
|
|
||||||
|
ctx, stub := process.WithStub(ctx)
|
||||||
|
stub.WithStdoutFor(`python[\S]+ --version`, "Python 3.10.5")
|
||||||
|
// on Unix, we call `python3`, but on Windows it is `python.exe`
|
||||||
|
stub.WithStderrFor(`python[\S]+ -m venv .*/.databricks/labs/blueprint/state/venv`, "[mock venv create]")
|
||||||
|
stub.WithStderrFor(`python[\S]+ -m pip install --upgrade --upgrade-strategy eager .`, "[mock pip install]")
|
||||||
|
stub.WithStdoutFor(`python[\S]+ install.py`, "setting up important infrastructure")
|
||||||
|
|
||||||
|
// simulate the case of GitHub Actions
|
||||||
|
ctx = env.Set(ctx, "DATABRICKS_HOST", server.URL)
|
||||||
|
ctx = env.Set(ctx, "DATABRICKS_TOKEN", "...")
|
||||||
|
ctx = env.Set(ctx, "DATABRICKS_CLUSTER_ID", "installer-cluster")
|
||||||
|
ctx = env.Set(ctx, "DATABRICKS_WAREHOUSE_ID", "installer-warehouse")
|
||||||
|
|
||||||
|
r := testcli.NewRunner(t, ctx, "labs", "install", "blueprint", "--offline=true", "--debug")
|
||||||
|
r.RunAndExpectOutput("setting up important infrastructure")
|
||||||
|
}
|
||||||
|
|
||||||
func TestInstallerWorksForDevelopment(t *testing.T) {
|
func TestInstallerWorksForDevelopment(t *testing.T) {
|
||||||
defer func() {
|
defer func() {
|
||||||
if !t.Failed() {
|
if !t.Failed() {
|
||||||
|
|
|
@ -307,7 +307,7 @@ func (p *Project) checkUpdates(cmd *cobra.Command) error {
|
||||||
// might not be installed yet
|
// might not be installed yet
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
r := github.NewReleaseCache("databrickslabs", p.Name, p.CacheDir())
|
r := github.NewReleaseCache("databrickslabs", p.Name, p.CacheDir(), false)
|
||||||
versions, err := r.Load(ctx)
|
versions, err := r.Load(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
|
Loading…
Reference in New Issue