From b81008e2f64d3ee9a29338f4e42032cb56630e86 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Tue, 5 Nov 2024 20:59:27 +0530 Subject: [PATCH] Clean host URL in the `auth login` command (#1879) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Changes The host URL for databricks workspaces includes the workspaceId by default as a positional arg. Eg: https://e2-dogfood.staging.cloud.databricks.com/?o=1234 Thus a user can't simply copy paste the URL today to the auth login command. They'll see a runtime error: ``` ➜ cli git:(main) ✗ databricks auth login --host https://e2-dogfood.staging.cloud.databricks.com/\?o\=xxx --profile new-dg Error: oidc: fetch .well-known: failed to unmarshal response body: invalid character '<' looking for beginning of value. This is likely a bug in the Databricks SDK for Go or the underlying REST API. Please report this issue with the following debugging information to the SDK issue tracker at https://github.com/databricks/databricks-sdk-go/issues. Request log: GET /login.html ... ``` ## Tests Unit tests and manually. Now auth login works even when the workspace_id is included in the URL. --- libs/auth/oauth.go | 24 ++++++++++++++++++++++++ libs/auth/oauth_test.go | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/libs/auth/oauth.go b/libs/auth/oauth.go index 7c1cb957..026c4546 100644 --- a/libs/auth/oauth.go +++ b/libs/auth/oauth.go @@ -9,6 +9,7 @@ import ( "errors" "fmt" "net" + "net/url" "strings" "time" @@ -143,6 +144,26 @@ func (a *PersistentAuth) Challenge(ctx context.Context) error { return nil } +// This function cleans up the host URL by only retaining the scheme and the host. +// This function thus removes any path, query arguments, or fragments from the URL. +func (a *PersistentAuth) cleanHost() { + parsedHost, err := url.Parse(a.Host) + if err != nil { + return + } + // when either host or scheme is empty, we don't want to clean it. This is because + // the Go url library parses a raw "abc" string as the path of a URL and cleaning + // it will return thus return an empty string. + if parsedHost.Host == "" || parsedHost.Scheme == "" { + return + } + host := url.URL{ + Scheme: parsedHost.Scheme, + Host: parsedHost.Host, + } + a.Host = host.String() +} + func (a *PersistentAuth) init(ctx context.Context) error { if a.Host == "" && a.AccountID == "" { return ErrFetchCredentials @@ -156,6 +177,9 @@ func (a *PersistentAuth) init(ctx context.Context) error { if a.browser == nil { a.browser = browser.OpenURL } + + a.cleanHost() + // try acquire listener, which we also use as a machine-local // exclusive lock to prevent token cache corruption in the scope // of developer machine, where this command runs. diff --git a/libs/auth/oauth_test.go b/libs/auth/oauth_test.go index ea6a8061..fdf0d04b 100644 --- a/libs/auth/oauth_test.go +++ b/libs/auth/oauth_test.go @@ -228,3 +228,37 @@ func TestChallengeFailed(t *testing.T) { assert.EqualError(t, err, "authorize: access_denied: Policy evaluation failed for this request") }) } + +func TestPersistentAuthCleanHost(t *testing.T) { + for _, tcases := range []struct { + in string + out string + }{ + {"https://example.com", "https://example.com"}, + {"https://example.com/", "https://example.com"}, + {"https://example.com/path", "https://example.com"}, + {"https://example.com/path/subpath", "https://example.com"}, + {"https://example.com/path?query=1", "https://example.com"}, + {"https://example.com/path?query=1&other=2", "https://example.com"}, + {"https://example.com/path#fragment", "https://example.com"}, + {"https://example.com/path?query=1#fragment", "https://example.com"}, + {"https://example.com/path?query=1&other=2#fragment", "https://example.com"}, + {"https://example.com/path/subpath?query=1", "https://example.com"}, + {"https://example.com/path/subpath?query=1&other=2", "https://example.com"}, + {"https://example.com/path/subpath#fragment", "https://example.com"}, + {"https://example.com/path/subpath?query=1#fragment", "https://example.com"}, + {"https://example.com/path/subpath?query=1&other=2#fragment", "https://example.com"}, + {"https://example.com/path?query=1%20value&other=2%20value", "https://example.com"}, + {"http://example.com/path/subpath?query=1%20value&other=2%20value", "http://example.com"}, + + // URLs without scheme should be left as is + {"abc", "abc"}, + {"abc.com/def", "abc.com/def"}, + } { + p := &PersistentAuth{ + Host: tcases.in, + } + p.cleanHost() + assert.Equal(t, tcases.out, p.Host) + } +}