mirror of https://github.com/databricks/cli.git
Enable offline install of labs projects (#2049)
## Changes <!-- Summary of your changes that are easy to understand --> This PR makes changes to the labs code base to allow for offline installation of labs projects (like UCX). By passing a flag --offline=true, the code will skip checking for project versions and download code from GitHub and instead will look from the local installation folder. This cmd is useful in systems where there is internet restriction, the user should follow a set-up as follows: - install a labs project on a machine which has internet - zip and copy the file to the intended machine and - run databricks labs install <project name>--offline=true it will look for the code in the same install directory and if present load from there. Closes #1646 related to https://github.com/databrickslabs/ucx/issues/3418 ## Tests <!-- How is this tested? --> Added unit test case and tested. NO_CHANGELOG=true --------- Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com> Co-authored-by: Lennart Kats (databricks) <lennart.kats@databricks.com> Co-authored-by: Denis Bilenko <denis.bilenko@databricks.com> Co-authored-by: Julia Crawford (Databricks) <julia.crawford@databricks.com> Co-authored-by: Ilya Kuznetsov <ilya.kuznetsov@databricks.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Andrew Nester <andrew.nester@databricks.com> Co-authored-by: Anton Nekipelov <226657+anton-107@users.noreply.github.com> Co-authored-by: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com>
This commit is contained in:
parent
b5a7023ef1
commit
fa0a734b3c
|
@ -13,12 +13,13 @@ const cacheTTL = 1 * time.Hour
|
|||
|
||||
// NewReleaseCache creates a release cache for a repository in the GitHub org.
|
||||
// Caller has to provide different cache directories for different repositories.
|
||||
func NewReleaseCache(org, repo, cacheDir string) *ReleaseCache {
|
||||
func NewReleaseCache(org, repo, cacheDir string, offlineInstall bool) *ReleaseCache {
|
||||
pattern := fmt.Sprintf("%s-%s-releases", org, repo)
|
||||
return &ReleaseCache{
|
||||
cache: localcache.NewLocalCache[Versions](cacheDir, pattern, cacheTTL),
|
||||
Org: org,
|
||||
Repo: repo,
|
||||
Offline: offlineInstall,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -26,13 +27,18 @@ type ReleaseCache struct {
|
|||
cache localcache.LocalCache[Versions]
|
||||
Org string
|
||||
Repo string
|
||||
Offline bool
|
||||
}
|
||||
|
||||
func (r *ReleaseCache) Load(ctx context.Context) (Versions, error) {
|
||||
if !r.Offline {
|
||||
return r.cache.Load(ctx, func() (Versions, error) {
|
||||
return getVersions(ctx, r.Org, r.Repo)
|
||||
})
|
||||
}
|
||||
cached, err := r.cache.LoadCache()
|
||||
return cached.Data, err
|
||||
}
|
||||
|
||||
// getVersions is considered to be a private API, as we want the usage go through a cache
|
||||
func getVersions(ctx context.Context, org, repo string) (Versions, error) {
|
||||
|
|
|
@ -26,7 +26,7 @@ func TestLoadsReleasesForCLI(t *testing.T) {
|
|||
ctx := context.Background()
|
||||
ctx = WithApiOverride(ctx, server.URL)
|
||||
|
||||
r := NewReleaseCache("databricks", "cli", t.TempDir())
|
||||
r := NewReleaseCache("databricks", "cli", t.TempDir(), false)
|
||||
all, err := r.Load(ctx)
|
||||
assert.NoError(t, err)
|
||||
assert.Len(t, all, 2)
|
||||
|
|
|
@ -7,16 +7,20 @@ import (
|
|||
)
|
||||
|
||||
func newInstallCommand() *cobra.Command {
|
||||
return &cobra.Command{
|
||||
Use: "install NAME",
|
||||
Args: root.ExactArgs(1),
|
||||
Short: "Installs project",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
inst, err := project.NewInstaller(cmd, args[0])
|
||||
cmd := &cobra.Command{}
|
||||
var offlineInstall bool
|
||||
|
||||
cmd.Flags().BoolVar(&offlineInstall, "offline", offlineInstall, `If installing in offline mode, set this flag to true.`)
|
||||
|
||||
cmd.Use = "install NAME"
|
||||
cmd.Args = root.ExactArgs(1)
|
||||
cmd.Short = "Installs project"
|
||||
cmd.RunE = func(cmd *cobra.Command, args []string) error {
|
||||
inst, err := project.NewInstaller(cmd, args[0], offlineInstall)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return inst.Install(cmd.Context())
|
||||
},
|
||||
}
|
||||
return cmd
|
||||
}
|
||||
|
|
|
@ -35,7 +35,7 @@ type LocalCache[T any] struct {
|
|||
}
|
||||
|
||||
func (r *LocalCache[T]) Load(ctx context.Context, refresh func() (T, error)) (T, error) {
|
||||
cached, err := r.loadCache()
|
||||
cached, err := r.LoadCache()
|
||||
if errors.Is(err, fs.ErrNotExist) {
|
||||
return r.refreshCache(ctx, refresh, r.zero)
|
||||
} else if err != nil {
|
||||
|
@ -96,7 +96,7 @@ func (r *LocalCache[T]) FileName() string {
|
|||
return filepath.Join(r.dir, r.name+".json")
|
||||
}
|
||||
|
||||
func (r *LocalCache[T]) loadCache() (*cached[T], error) {
|
||||
func (r *LocalCache[T]) LoadCache() (*cached[T], error) {
|
||||
jsonFile := r.FileName()
|
||||
raw, err := os.ReadFile(r.FileName())
|
||||
if err != nil {
|
||||
|
|
|
@ -54,7 +54,7 @@ func (d *devInstallation) Install(ctx context.Context) error {
|
|||
return d.Installer.runHook(d.Command)
|
||||
}
|
||||
|
||||
func NewInstaller(cmd *cobra.Command, name string) (installable, error) {
|
||||
func NewInstaller(cmd *cobra.Command, name string, offlineInstall bool) (installable, error) {
|
||||
if name == "." {
|
||||
wd, err := os.Getwd()
|
||||
if err != nil {
|
||||
|
@ -75,28 +75,32 @@ func NewInstaller(cmd *cobra.Command, name string) (installable, error) {
|
|||
version = "latest"
|
||||
}
|
||||
f := &fetcher{name}
|
||||
version, err := f.checkReleasedVersions(cmd, version)
|
||||
|
||||
version, err := f.checkReleasedVersions(cmd, version, offlineInstall)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("version: %w", err)
|
||||
}
|
||||
prj, err := f.loadRemoteProjectDefinition(cmd, version)
|
||||
|
||||
prj, err := f.loadRemoteProjectDefinition(cmd, version, offlineInstall)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("remote: %w", err)
|
||||
}
|
||||
|
||||
return &installer{
|
||||
Project: prj,
|
||||
version: version,
|
||||
cmd: cmd,
|
||||
offlineInstall: offlineInstall,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func NewUpgrader(cmd *cobra.Command, name string) (*installer, error) {
|
||||
f := &fetcher{name}
|
||||
version, err := f.checkReleasedVersions(cmd, "latest")
|
||||
version, err := f.checkReleasedVersions(cmd, "latest", false)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("version: %w", err)
|
||||
}
|
||||
prj, err := f.loadRemoteProjectDefinition(cmd, version)
|
||||
prj, err := f.loadRemoteProjectDefinition(cmd, version, false)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("remote: %w", err)
|
||||
}
|
||||
|
@ -115,7 +119,7 @@ type fetcher struct {
|
|||
name string
|
||||
}
|
||||
|
||||
func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string) (string, error) {
|
||||
func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string, offlineInstall bool) (string, error) {
|
||||
ctx := cmd.Context()
|
||||
cacheDir, err := PathInLabs(ctx, f.name, "cache")
|
||||
if err != nil {
|
||||
|
@ -123,7 +127,8 @@ func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string) (str
|
|||
}
|
||||
// `databricks labs isntall X` doesn't know which exact version to fetch, so first
|
||||
// we fetch all versions and then pick the latest one dynamically.
|
||||
versions, err := github.NewReleaseCache("databrickslabs", f.name, cacheDir).Load(ctx)
|
||||
var versions github.Versions
|
||||
versions, err = github.NewReleaseCache("databrickslabs", f.name, cacheDir, offlineInstall).Load(ctx)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("versions: %w", err)
|
||||
}
|
||||
|
@ -140,11 +145,23 @@ func (f *fetcher) checkReleasedVersions(cmd *cobra.Command, version string) (str
|
|||
return version, nil
|
||||
}
|
||||
|
||||
func (i *fetcher) loadRemoteProjectDefinition(cmd *cobra.Command, version string) (*Project, error) {
|
||||
func (i *fetcher) loadRemoteProjectDefinition(cmd *cobra.Command, version string, offlineInstall bool) (*Project, error) {
|
||||
ctx := cmd.Context()
|
||||
raw, err := github.ReadFileFromRef(ctx, "databrickslabs", i.name, version, "labs.yml")
|
||||
var raw []byte
|
||||
var err error
|
||||
if !offlineInstall {
|
||||
raw, err = github.ReadFileFromRef(ctx, "databrickslabs", i.name, version, "labs.yml")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read labs.yml from GitHub: %w", err)
|
||||
}
|
||||
} else {
|
||||
libDir, _ := PathInLabs(ctx, i.name, "lib")
|
||||
fileName := filepath.Join(libDir, "labs.yml")
|
||||
raw, err = os.ReadFile(fileName)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("read labs.yml from local path %s: %w", libDir, err)
|
||||
}
|
||||
}
|
||||
|
||||
return readFromBytes(ctx, raw)
|
||||
}
|
||||
|
|
|
@ -77,6 +77,7 @@ type installer struct {
|
|||
// - auth profile flag override
|
||||
// - standard input, output, and error streams
|
||||
cmd *cobra.Command
|
||||
offlineInstall bool
|
||||
}
|
||||
|
||||
func (i *installer) Install(ctx context.Context) error {
|
||||
|
@ -101,10 +102,16 @@ func (i *installer) Install(ctx context.Context) error {
|
|||
} else if err != nil {
|
||||
return fmt.Errorf("login: %w", err)
|
||||
}
|
||||
if !i.offlineInstall {
|
||||
err = i.downloadLibrary(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("lib: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
if _, err := os.Stat(i.LibDir()); os.IsNotExist(err) {
|
||||
return fmt.Errorf("no local installation found: %w", err)
|
||||
}
|
||||
err = i.setupPythonVirtualEnvironment(ctx, w)
|
||||
if err != nil {
|
||||
return fmt.Errorf("python: %w", err)
|
||||
|
|
|
@ -241,6 +241,45 @@ func TestInstallerWorksForReleases(t *testing.T) {
|
|||
r.RunAndExpectOutput("setting up important infrastructure")
|
||||
}
|
||||
|
||||
func TestOfflineInstallerWorksForReleases(t *testing.T) {
|
||||
// This cmd is useful in systems where there is internet restriction, the user should follow a set-up as follows:
|
||||
// install a labs project on a machine which has internet
|
||||
// zip and copy the file to the intended machine and
|
||||
// run databricks labs install --offline=true
|
||||
// it will look for the code in the same install directory and if present, install from there.
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if r.URL.Path == "/api/2.1/clusters/get" {
|
||||
respondWithJSON(t, w, &compute.ClusterDetails{
|
||||
State: compute.StateRunning,
|
||||
})
|
||||
return
|
||||
}
|
||||
t.Logf("Requested: %s", r.URL.Path)
|
||||
t.FailNow()
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
ctx := installerContext(t, server)
|
||||
newHome := copyTestdata(t, "testdata/installed-in-home")
|
||||
ctx = env.WithUserHomeDir(ctx, newHome)
|
||||
|
||||
ctx, stub := process.WithStub(ctx)
|
||||
stub.WithStdoutFor(`python[\S]+ --version`, "Python 3.10.5")
|
||||
// on Unix, we call `python3`, but on Windows it is `python.exe`
|
||||
stub.WithStderrFor(`python[\S]+ -m venv .*/.databricks/labs/blueprint/state/venv`, "[mock venv create]")
|
||||
stub.WithStderrFor(`python[\S]+ -m pip install --upgrade --upgrade-strategy eager .`, "[mock pip install]")
|
||||
stub.WithStdoutFor(`python[\S]+ install.py`, "setting up important infrastructure")
|
||||
|
||||
// simulate the case of GitHub Actions
|
||||
ctx = env.Set(ctx, "DATABRICKS_HOST", server.URL)
|
||||
ctx = env.Set(ctx, "DATABRICKS_TOKEN", "...")
|
||||
ctx = env.Set(ctx, "DATABRICKS_CLUSTER_ID", "installer-cluster")
|
||||
ctx = env.Set(ctx, "DATABRICKS_WAREHOUSE_ID", "installer-warehouse")
|
||||
|
||||
r := testcli.NewRunner(t, ctx, "labs", "install", "blueprint", "--offline=true", "--debug")
|
||||
r.RunAndExpectOutput("setting up important infrastructure")
|
||||
}
|
||||
|
||||
func TestInstallerWorksForDevelopment(t *testing.T) {
|
||||
defer func() {
|
||||
if !t.Failed() {
|
||||
|
|
|
@ -307,7 +307,7 @@ func (p *Project) checkUpdates(cmd *cobra.Command) error {
|
|||
// might not be installed yet
|
||||
return nil
|
||||
}
|
||||
r := github.NewReleaseCache("databrickslabs", p.Name, p.CacheDir())
|
||||
r := github.NewReleaseCache("databrickslabs", p.Name, p.CacheDir(), false)
|
||||
versions, err := r.Load(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
Loading…
Reference in New Issue