2023-01-06 15:15:57 +00:00
|
|
|
package auth
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2023-08-15 13:50:40 +00:00
|
|
|
"crypto/rand"
|
2023-01-06 15:15:57 +00:00
|
|
|
"crypto/sha256"
|
|
|
|
_ "embed"
|
|
|
|
"encoding/base64"
|
|
|
|
"errors"
|
|
|
|
"fmt"
|
|
|
|
"net"
|
2024-11-05 15:29:27 +00:00
|
|
|
"net/url"
|
2023-01-06 15:15:57 +00:00
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
2023-05-16 16:35:39 +00:00
|
|
|
"github.com/databricks/cli/libs/auth/cache"
|
2024-03-11 22:24:23 +00:00
|
|
|
"github.com/databricks/databricks-sdk-go/httpclient"
|
2023-01-06 15:15:57 +00:00
|
|
|
"github.com/databricks/databricks-sdk-go/retries"
|
|
|
|
"github.com/pkg/browser"
|
|
|
|
"golang.org/x/oauth2"
|
|
|
|
"golang.org/x/oauth2/authhandler"
|
|
|
|
)
|
|
|
|
|
Improve token refresh flow (#1434)
## Changes
Currently, there are a number of issues with the non-happy-path flows
for token refresh in the CLI.
If the token refresh fails, the raw error message is presented to the
user, as seen below. This message is very difficult for users to
interpret and doesn't give any clear direction on how to resolve this
issue.
```
Error: token refresh: Post "https://adb-<WSID>.azuredatabricks.net/oidc/v1/token": http 400: {"error":"invalid_request","error_description":"Refresh token is invalid"}
```
When logging in again, I've noticed that the timeout for logging in is
very short, only 45 seconds. If a user is using a password manager and
needs to login to that first, or needs to do MFA, 45 seconds may not be
enough time. to an account-level profile, it is quite frustrating for
users to need to re-enter account ID information when that information
is already stored in the user's `.databrickscfg` file.
This PR tackles these two issues. First, the presentation of error
messages from `databricks auth token` is improved substantially by
converting the `error` into a human-readable message. When the refresh
token is invalid, it will present a command for the user to run to
reauthenticate. If the token fetching failed for some other reason, that
reason will be presented in a nice way, providing front-line debugging
steps and ultimately redirecting users to file a ticket at this repo if
they can't resolve the issue themselves. After this PR, the new error
message is:
```
Error: a new access token could not be retrieved because the refresh token is invalid. To reauthenticate, run `.databricks/databricks auth login --host https://adb-<WSID>.azuredatabricks.net`
```
To improve the login flow, this PR modifies `databricks auth login` to
auto-complete the account ID from the profile when present.
Additionally, it increases the login timeout from 45 seconds to 1 hour
to give the user sufficient time to login as needed.
To test this change, I needed to refactor some components of the CLI
around profile management, the token cache, and the API client used to
fetch OAuth tokens. These are now settable in the context, and a
demonstration of how they can be set and used is found in
`auth_test.go`.
Separately, this also demonstrates a sort-of integration test of the CLI
by executing the Cobra command for `databricks auth token` from tests,
which may be useful for testing other end-to-end functionality in the
CLI. In particular, I believe this is necessary in order to set flag
values (like the `--profile` flag in this case) for use in testing.
## Tests
Unit tests cover the unhappy and happy paths using the mocked API
client, token cache, and profiler.
Manually tested
---------
Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
2024-05-16 10:22:09 +00:00
|
|
|
var apiClientForOauth int
|
|
|
|
|
|
|
|
func WithApiClientForOAuth(ctx context.Context, c *httpclient.ApiClient) context.Context {
|
|
|
|
return context.WithValue(ctx, &apiClientForOauth, c)
|
|
|
|
}
|
|
|
|
|
|
|
|
func GetApiClientForOAuth(ctx context.Context) *httpclient.ApiClient {
|
|
|
|
c, ok := ctx.Value(&apiClientForOauth).(*httpclient.ApiClient)
|
|
|
|
if !ok {
|
|
|
|
return httpclient.NewApiClient(httpclient.ClientConfig{})
|
|
|
|
}
|
|
|
|
return c
|
|
|
|
}
|
|
|
|
|
2023-01-06 15:15:57 +00:00
|
|
|
const (
|
|
|
|
// these values are predefined by Databricks as a public client
|
|
|
|
// and is specific to this application only. Using these values
|
|
|
|
// for other applications is not allowed.
|
|
|
|
appClientID = "databricks-cli"
|
|
|
|
appRedirectAddr = "localhost:8020"
|
|
|
|
|
|
|
|
// maximum amount of time to acquire listener on appRedirectAddr
|
Improve token refresh flow (#1434)
## Changes
Currently, there are a number of issues with the non-happy-path flows
for token refresh in the CLI.
If the token refresh fails, the raw error message is presented to the
user, as seen below. This message is very difficult for users to
interpret and doesn't give any clear direction on how to resolve this
issue.
```
Error: token refresh: Post "https://adb-<WSID>.azuredatabricks.net/oidc/v1/token": http 400: {"error":"invalid_request","error_description":"Refresh token is invalid"}
```
When logging in again, I've noticed that the timeout for logging in is
very short, only 45 seconds. If a user is using a password manager and
needs to login to that first, or needs to do MFA, 45 seconds may not be
enough time. to an account-level profile, it is quite frustrating for
users to need to re-enter account ID information when that information
is already stored in the user's `.databrickscfg` file.
This PR tackles these two issues. First, the presentation of error
messages from `databricks auth token` is improved substantially by
converting the `error` into a human-readable message. When the refresh
token is invalid, it will present a command for the user to run to
reauthenticate. If the token fetching failed for some other reason, that
reason will be presented in a nice way, providing front-line debugging
steps and ultimately redirecting users to file a ticket at this repo if
they can't resolve the issue themselves. After this PR, the new error
message is:
```
Error: a new access token could not be retrieved because the refresh token is invalid. To reauthenticate, run `.databricks/databricks auth login --host https://adb-<WSID>.azuredatabricks.net`
```
To improve the login flow, this PR modifies `databricks auth login` to
auto-complete the account ID from the profile when present.
Additionally, it increases the login timeout from 45 seconds to 1 hour
to give the user sufficient time to login as needed.
To test this change, I needed to refactor some components of the CLI
around profile management, the token cache, and the API client used to
fetch OAuth tokens. These are now settable in the context, and a
demonstration of how they can be set and used is found in
`auth_test.go`.
Separately, this also demonstrates a sort-of integration test of the CLI
by executing the Cobra command for `databricks auth token` from tests,
which may be useful for testing other end-to-end functionality in the
CLI. In particular, I believe this is necessary in order to set flag
values (like the `--profile` flag in this case) for use in testing.
## Tests
Unit tests cover the unhappy and happy paths using the mocked API
client, token cache, and profiler.
Manually tested
---------
Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
2024-05-16 10:22:09 +00:00
|
|
|
listenerTimeout = 45 * time.Second
|
2023-01-06 15:15:57 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
var ( // Databricks SDK API: `databricks OAuth is not` will be checked for presence
|
|
|
|
ErrOAuthNotSupported = errors.New("databricks OAuth is not supported for this host")
|
|
|
|
ErrNotConfigured = errors.New("databricks OAuth is not configured for this host")
|
|
|
|
ErrFetchCredentials = errors.New("cannot fetch credentials")
|
|
|
|
)
|
|
|
|
|
|
|
|
type PersistentAuth struct {
|
|
|
|
Host string
|
|
|
|
AccountID string
|
|
|
|
|
2024-03-11 22:24:23 +00:00
|
|
|
http *httpclient.ApiClient
|
Improve token refresh flow (#1434)
## Changes
Currently, there are a number of issues with the non-happy-path flows
for token refresh in the CLI.
If the token refresh fails, the raw error message is presented to the
user, as seen below. This message is very difficult for users to
interpret and doesn't give any clear direction on how to resolve this
issue.
```
Error: token refresh: Post "https://adb-<WSID>.azuredatabricks.net/oidc/v1/token": http 400: {"error":"invalid_request","error_description":"Refresh token is invalid"}
```
When logging in again, I've noticed that the timeout for logging in is
very short, only 45 seconds. If a user is using a password manager and
needs to login to that first, or needs to do MFA, 45 seconds may not be
enough time. to an account-level profile, it is quite frustrating for
users to need to re-enter account ID information when that information
is already stored in the user's `.databrickscfg` file.
This PR tackles these two issues. First, the presentation of error
messages from `databricks auth token` is improved substantially by
converting the `error` into a human-readable message. When the refresh
token is invalid, it will present a command for the user to run to
reauthenticate. If the token fetching failed for some other reason, that
reason will be presented in a nice way, providing front-line debugging
steps and ultimately redirecting users to file a ticket at this repo if
they can't resolve the issue themselves. After this PR, the new error
message is:
```
Error: a new access token could not be retrieved because the refresh token is invalid. To reauthenticate, run `.databricks/databricks auth login --host https://adb-<WSID>.azuredatabricks.net`
```
To improve the login flow, this PR modifies `databricks auth login` to
auto-complete the account ID from the profile when present.
Additionally, it increases the login timeout from 45 seconds to 1 hour
to give the user sufficient time to login as needed.
To test this change, I needed to refactor some components of the CLI
around profile management, the token cache, and the API client used to
fetch OAuth tokens. These are now settable in the context, and a
demonstration of how they can be set and used is found in
`auth_test.go`.
Separately, this also demonstrates a sort-of integration test of the CLI
by executing the Cobra command for `databricks auth token` from tests,
which may be useful for testing other end-to-end functionality in the
CLI. In particular, I believe this is necessary in order to set flag
values (like the `--profile` flag in this case) for use in testing.
## Tests
Unit tests cover the unhappy and happy paths using the mocked API
client, token cache, and profiler.
Manually tested
---------
Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
2024-05-16 10:22:09 +00:00
|
|
|
cache cache.TokenCache
|
2023-01-06 15:15:57 +00:00
|
|
|
ln net.Listener
|
|
|
|
browser func(string) error
|
|
|
|
}
|
|
|
|
|
Improve token refresh flow (#1434)
## Changes
Currently, there are a number of issues with the non-happy-path flows
for token refresh in the CLI.
If the token refresh fails, the raw error message is presented to the
user, as seen below. This message is very difficult for users to
interpret and doesn't give any clear direction on how to resolve this
issue.
```
Error: token refresh: Post "https://adb-<WSID>.azuredatabricks.net/oidc/v1/token": http 400: {"error":"invalid_request","error_description":"Refresh token is invalid"}
```
When logging in again, I've noticed that the timeout for logging in is
very short, only 45 seconds. If a user is using a password manager and
needs to login to that first, or needs to do MFA, 45 seconds may not be
enough time. to an account-level profile, it is quite frustrating for
users to need to re-enter account ID information when that information
is already stored in the user's `.databrickscfg` file.
This PR tackles these two issues. First, the presentation of error
messages from `databricks auth token` is improved substantially by
converting the `error` into a human-readable message. When the refresh
token is invalid, it will present a command for the user to run to
reauthenticate. If the token fetching failed for some other reason, that
reason will be presented in a nice way, providing front-line debugging
steps and ultimately redirecting users to file a ticket at this repo if
they can't resolve the issue themselves. After this PR, the new error
message is:
```
Error: a new access token could not be retrieved because the refresh token is invalid. To reauthenticate, run `.databricks/databricks auth login --host https://adb-<WSID>.azuredatabricks.net`
```
To improve the login flow, this PR modifies `databricks auth login` to
auto-complete the account ID from the profile when present.
Additionally, it increases the login timeout from 45 seconds to 1 hour
to give the user sufficient time to login as needed.
To test this change, I needed to refactor some components of the CLI
around profile management, the token cache, and the API client used to
fetch OAuth tokens. These are now settable in the context, and a
demonstration of how they can be set and used is found in
`auth_test.go`.
Separately, this also demonstrates a sort-of integration test of the CLI
by executing the Cobra command for `databricks auth token` from tests,
which may be useful for testing other end-to-end functionality in the
CLI. In particular, I believe this is necessary in order to set flag
values (like the `--profile` flag in this case) for use in testing.
## Tests
Unit tests cover the unhappy and happy paths using the mocked API
client, token cache, and profiler.
Manually tested
---------
Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
2024-05-16 10:22:09 +00:00
|
|
|
func (a *PersistentAuth) SetApiClient(h *httpclient.ApiClient) {
|
|
|
|
a.http = h
|
2023-01-06 15:15:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (a *PersistentAuth) Load(ctx context.Context) (*oauth2.Token, error) {
|
|
|
|
err := a.init(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("init: %w", err)
|
|
|
|
}
|
|
|
|
// lookup token identified by host (and possibly the account id)
|
|
|
|
key := a.key()
|
|
|
|
t, err := a.cache.Lookup(key)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cache: %w", err)
|
|
|
|
}
|
|
|
|
// early return for valid tokens
|
|
|
|
if t.Valid() {
|
|
|
|
// do not print refresh token to end-user
|
|
|
|
t.RefreshToken = ""
|
|
|
|
return t, nil
|
|
|
|
}
|
|
|
|
// OAuth2 config is invoked only for expired tokens to speed up
|
|
|
|
// the happy path in the token retrieval
|
2024-03-11 22:24:23 +00:00
|
|
|
cfg, err := a.oauth2Config(ctx)
|
2023-01-06 15:15:57 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2024-03-11 22:24:23 +00:00
|
|
|
// make OAuth2 library use our client
|
|
|
|
ctx = a.http.InContextForOAuth2(ctx)
|
2023-01-06 15:15:57 +00:00
|
|
|
// eagerly refresh token
|
|
|
|
refreshed, err := cfg.TokenSource(ctx, t).Token()
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("token refresh: %w", err)
|
|
|
|
}
|
|
|
|
err = a.cache.Store(key, refreshed)
|
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("cache refresh: %w", err)
|
|
|
|
}
|
|
|
|
// do not print refresh token to end-user
|
|
|
|
refreshed.RefreshToken = ""
|
|
|
|
return refreshed, nil
|
|
|
|
}
|
|
|
|
|
2023-06-21 10:58:28 +00:00
|
|
|
func (a *PersistentAuth) ProfileName() string {
|
2023-06-02 11:49:39 +00:00
|
|
|
if a.AccountID != "" {
|
|
|
|
return fmt.Sprintf("ACCOUNT-%s", a.AccountID)
|
|
|
|
}
|
|
|
|
host := strings.TrimPrefix(a.Host, "https://")
|
|
|
|
split := strings.Split(host, ".")
|
|
|
|
return split[0]
|
|
|
|
}
|
|
|
|
|
2023-01-06 15:15:57 +00:00
|
|
|
func (a *PersistentAuth) Challenge(ctx context.Context) error {
|
|
|
|
err := a.init(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("init: %w", err)
|
|
|
|
}
|
2024-03-11 22:24:23 +00:00
|
|
|
cfg, err := a.oauth2Config(ctx)
|
2023-01-06 15:15:57 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
cb, err := newCallback(ctx, a)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("callback server: %w", err)
|
|
|
|
}
|
|
|
|
defer cb.Close()
|
|
|
|
state, pkce := a.stateAndPKCE()
|
2024-03-11 22:24:23 +00:00
|
|
|
// make OAuth2 library use our client
|
|
|
|
ctx = a.http.InContextForOAuth2(ctx)
|
2023-01-06 15:15:57 +00:00
|
|
|
ts := authhandler.TokenSourceWithPKCE(ctx, cfg, state, cb.Handler, pkce)
|
|
|
|
t, err := ts.Token()
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("authorize: %w", err)
|
|
|
|
}
|
|
|
|
// cache token identified by host (and possibly the account id)
|
|
|
|
err = a.cache.Store(a.key(), t)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("store: %w", err)
|
|
|
|
}
|
2023-06-21 10:58:28 +00:00
|
|
|
return nil
|
2023-01-06 15:15:57 +00:00
|
|
|
}
|
|
|
|
|
2024-09-01 16:22:18 +00:00
|
|
|
func (a *PersistentAuth) ClearToken(ctx context.Context) error {
|
|
|
|
if a.Host == "" && a.AccountID == "" {
|
|
|
|
return ErrFetchCredentials
|
|
|
|
}
|
|
|
|
if a.cache == nil {
|
|
|
|
a.cache = cache.GetTokenCache(ctx)
|
|
|
|
}
|
|
|
|
// lookup token identified by host (and possibly the account id)
|
|
|
|
key := a.key()
|
2024-09-23 18:21:38 +00:00
|
|
|
return a.cache.Delete(key)
|
2024-09-01 16:22:18 +00:00
|
|
|
}
|
|
|
|
|
2024-11-05 15:29:27 +00:00
|
|
|
// This function cleans up the host URL by only retaining the scheme and the host.
|
|
|
|
// This function thus removes any path, query arguments, or fragments from the URL.
|
|
|
|
func (a *PersistentAuth) cleanHost() {
|
|
|
|
parsedHost, err := url.Parse(a.Host)
|
|
|
|
if err != nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
// when either host or scheme is empty, we don't want to clean it. This is because
|
|
|
|
// the Go url library parses a raw "abc" string as the path of a URL and cleaning
|
|
|
|
// it will return thus return an empty string.
|
|
|
|
if parsedHost.Host == "" || parsedHost.Scheme == "" {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
host := url.URL{
|
|
|
|
Scheme: parsedHost.Scheme,
|
|
|
|
Host: parsedHost.Host,
|
|
|
|
}
|
|
|
|
a.Host = host.String()
|
|
|
|
}
|
|
|
|
|
2023-01-06 15:15:57 +00:00
|
|
|
func (a *PersistentAuth) init(ctx context.Context) error {
|
|
|
|
if a.Host == "" && a.AccountID == "" {
|
|
|
|
return ErrFetchCredentials
|
|
|
|
}
|
|
|
|
if a.http == nil {
|
Improve token refresh flow (#1434)
## Changes
Currently, there are a number of issues with the non-happy-path flows
for token refresh in the CLI.
If the token refresh fails, the raw error message is presented to the
user, as seen below. This message is very difficult for users to
interpret and doesn't give any clear direction on how to resolve this
issue.
```
Error: token refresh: Post "https://adb-<WSID>.azuredatabricks.net/oidc/v1/token": http 400: {"error":"invalid_request","error_description":"Refresh token is invalid"}
```
When logging in again, I've noticed that the timeout for logging in is
very short, only 45 seconds. If a user is using a password manager and
needs to login to that first, or needs to do MFA, 45 seconds may not be
enough time. to an account-level profile, it is quite frustrating for
users to need to re-enter account ID information when that information
is already stored in the user's `.databrickscfg` file.
This PR tackles these two issues. First, the presentation of error
messages from `databricks auth token` is improved substantially by
converting the `error` into a human-readable message. When the refresh
token is invalid, it will present a command for the user to run to
reauthenticate. If the token fetching failed for some other reason, that
reason will be presented in a nice way, providing front-line debugging
steps and ultimately redirecting users to file a ticket at this repo if
they can't resolve the issue themselves. After this PR, the new error
message is:
```
Error: a new access token could not be retrieved because the refresh token is invalid. To reauthenticate, run `.databricks/databricks auth login --host https://adb-<WSID>.azuredatabricks.net`
```
To improve the login flow, this PR modifies `databricks auth login` to
auto-complete the account ID from the profile when present.
Additionally, it increases the login timeout from 45 seconds to 1 hour
to give the user sufficient time to login as needed.
To test this change, I needed to refactor some components of the CLI
around profile management, the token cache, and the API client used to
fetch OAuth tokens. These are now settable in the context, and a
demonstration of how they can be set and used is found in
`auth_test.go`.
Separately, this also demonstrates a sort-of integration test of the CLI
by executing the Cobra command for `databricks auth token` from tests,
which may be useful for testing other end-to-end functionality in the
CLI. In particular, I believe this is necessary in order to set flag
values (like the `--profile` flag in this case) for use in testing.
## Tests
Unit tests cover the unhappy and happy paths using the mocked API
client, token cache, and profiler.
Manually tested
---------
Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
2024-05-16 10:22:09 +00:00
|
|
|
a.http = GetApiClientForOAuth(ctx)
|
2023-01-06 15:15:57 +00:00
|
|
|
}
|
|
|
|
if a.cache == nil {
|
Improve token refresh flow (#1434)
## Changes
Currently, there are a number of issues with the non-happy-path flows
for token refresh in the CLI.
If the token refresh fails, the raw error message is presented to the
user, as seen below. This message is very difficult for users to
interpret and doesn't give any clear direction on how to resolve this
issue.
```
Error: token refresh: Post "https://adb-<WSID>.azuredatabricks.net/oidc/v1/token": http 400: {"error":"invalid_request","error_description":"Refresh token is invalid"}
```
When logging in again, I've noticed that the timeout for logging in is
very short, only 45 seconds. If a user is using a password manager and
needs to login to that first, or needs to do MFA, 45 seconds may not be
enough time. to an account-level profile, it is quite frustrating for
users to need to re-enter account ID information when that information
is already stored in the user's `.databrickscfg` file.
This PR tackles these two issues. First, the presentation of error
messages from `databricks auth token` is improved substantially by
converting the `error` into a human-readable message. When the refresh
token is invalid, it will present a command for the user to run to
reauthenticate. If the token fetching failed for some other reason, that
reason will be presented in a nice way, providing front-line debugging
steps and ultimately redirecting users to file a ticket at this repo if
they can't resolve the issue themselves. After this PR, the new error
message is:
```
Error: a new access token could not be retrieved because the refresh token is invalid. To reauthenticate, run `.databricks/databricks auth login --host https://adb-<WSID>.azuredatabricks.net`
```
To improve the login flow, this PR modifies `databricks auth login` to
auto-complete the account ID from the profile when present.
Additionally, it increases the login timeout from 45 seconds to 1 hour
to give the user sufficient time to login as needed.
To test this change, I needed to refactor some components of the CLI
around profile management, the token cache, and the API client used to
fetch OAuth tokens. These are now settable in the context, and a
demonstration of how they can be set and used is found in
`auth_test.go`.
Separately, this also demonstrates a sort-of integration test of the CLI
by executing the Cobra command for `databricks auth token` from tests,
which may be useful for testing other end-to-end functionality in the
CLI. In particular, I believe this is necessary in order to set flag
values (like the `--profile` flag in this case) for use in testing.
## Tests
Unit tests cover the unhappy and happy paths using the mocked API
client, token cache, and profiler.
Manually tested
---------
Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
2024-05-16 10:22:09 +00:00
|
|
|
a.cache = cache.GetTokenCache(ctx)
|
2023-01-06 15:15:57 +00:00
|
|
|
}
|
|
|
|
if a.browser == nil {
|
|
|
|
a.browser = browser.OpenURL
|
|
|
|
}
|
2024-11-05 15:29:27 +00:00
|
|
|
|
|
|
|
a.cleanHost()
|
|
|
|
|
2023-01-06 15:15:57 +00:00
|
|
|
// try acquire listener, which we also use as a machine-local
|
|
|
|
// exclusive lock to prevent token cache corruption in the scope
|
|
|
|
// of developer machine, where this command runs.
|
Improve token refresh flow (#1434)
## Changes
Currently, there are a number of issues with the non-happy-path flows
for token refresh in the CLI.
If the token refresh fails, the raw error message is presented to the
user, as seen below. This message is very difficult for users to
interpret and doesn't give any clear direction on how to resolve this
issue.
```
Error: token refresh: Post "https://adb-<WSID>.azuredatabricks.net/oidc/v1/token": http 400: {"error":"invalid_request","error_description":"Refresh token is invalid"}
```
When logging in again, I've noticed that the timeout for logging in is
very short, only 45 seconds. If a user is using a password manager and
needs to login to that first, or needs to do MFA, 45 seconds may not be
enough time. to an account-level profile, it is quite frustrating for
users to need to re-enter account ID information when that information
is already stored in the user's `.databrickscfg` file.
This PR tackles these two issues. First, the presentation of error
messages from `databricks auth token` is improved substantially by
converting the `error` into a human-readable message. When the refresh
token is invalid, it will present a command for the user to run to
reauthenticate. If the token fetching failed for some other reason, that
reason will be presented in a nice way, providing front-line debugging
steps and ultimately redirecting users to file a ticket at this repo if
they can't resolve the issue themselves. After this PR, the new error
message is:
```
Error: a new access token could not be retrieved because the refresh token is invalid. To reauthenticate, run `.databricks/databricks auth login --host https://adb-<WSID>.azuredatabricks.net`
```
To improve the login flow, this PR modifies `databricks auth login` to
auto-complete the account ID from the profile when present.
Additionally, it increases the login timeout from 45 seconds to 1 hour
to give the user sufficient time to login as needed.
To test this change, I needed to refactor some components of the CLI
around profile management, the token cache, and the API client used to
fetch OAuth tokens. These are now settable in the context, and a
demonstration of how they can be set and used is found in
`auth_test.go`.
Separately, this also demonstrates a sort-of integration test of the CLI
by executing the Cobra command for `databricks auth token` from tests,
which may be useful for testing other end-to-end functionality in the
CLI. In particular, I believe this is necessary in order to set flag
values (like the `--profile` flag in this case) for use in testing.
## Tests
Unit tests cover the unhappy and happy paths using the mocked API
client, token cache, and profiler.
Manually tested
---------
Co-authored-by: Pieter Noordhuis <pieter.noordhuis@databricks.com>
2024-05-16 10:22:09 +00:00
|
|
|
listener, err := retries.Poll(ctx, listenerTimeout,
|
2023-01-06 15:15:57 +00:00
|
|
|
func() (*net.Listener, *retries.Err) {
|
|
|
|
var lc net.ListenConfig
|
|
|
|
l, err := lc.Listen(ctx, "tcp", appRedirectAddr)
|
|
|
|
if err != nil {
|
|
|
|
return nil, retries.Continue(err)
|
|
|
|
}
|
|
|
|
return &l, nil
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("listener: %w", err)
|
|
|
|
}
|
|
|
|
a.ln = *listener
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (a *PersistentAuth) Close() error {
|
|
|
|
if a.ln == nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return a.ln.Close()
|
|
|
|
}
|
|
|
|
|
2024-03-11 22:24:23 +00:00
|
|
|
func (a *PersistentAuth) oidcEndpoints(ctx context.Context) (*oauthAuthorizationServer, error) {
|
2023-01-06 15:15:57 +00:00
|
|
|
prefix := a.key()
|
|
|
|
if a.AccountID != "" {
|
|
|
|
return &oauthAuthorizationServer{
|
|
|
|
AuthorizationEndpoint: fmt.Sprintf("%s/v1/authorize", prefix),
|
|
|
|
TokenEndpoint: fmt.Sprintf("%s/v1/token", prefix),
|
|
|
|
}, nil
|
|
|
|
}
|
2024-03-11 22:24:23 +00:00
|
|
|
var oauthEndpoints oauthAuthorizationServer
|
2023-01-06 15:15:57 +00:00
|
|
|
oidc := fmt.Sprintf("%s/oidc/.well-known/oauth-authorization-server", prefix)
|
2024-03-11 22:24:23 +00:00
|
|
|
err := a.http.Do(ctx, "GET", oidc, httpclient.WithResponseUnmarshal(&oauthEndpoints))
|
2023-01-06 15:15:57 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("fetch .well-known: %w", err)
|
|
|
|
}
|
2024-03-11 22:24:23 +00:00
|
|
|
var httpErr *httpclient.HttpError
|
|
|
|
if errors.As(err, &httpErr) && httpErr.StatusCode != 200 {
|
2023-01-06 15:15:57 +00:00
|
|
|
return nil, ErrOAuthNotSupported
|
|
|
|
}
|
|
|
|
return &oauthEndpoints, nil
|
|
|
|
}
|
|
|
|
|
2024-03-11 22:24:23 +00:00
|
|
|
func (a *PersistentAuth) oauth2Config(ctx context.Context) (*oauth2.Config, error) {
|
2023-01-06 15:15:57 +00:00
|
|
|
// in this iteration of CLI, we're using all scopes by default,
|
|
|
|
// because tools like CLI and Terraform do use all apis. This
|
|
|
|
// decision may be reconsidered later, once we have a proper
|
|
|
|
// taxonomy of all scopes ready and implemented.
|
|
|
|
scopes := []string{
|
|
|
|
"offline_access",
|
2023-04-05 08:18:13 +00:00
|
|
|
"all-apis",
|
2023-01-06 15:15:57 +00:00
|
|
|
}
|
2024-03-11 22:24:23 +00:00
|
|
|
endpoints, err := a.oidcEndpoints(ctx)
|
2023-01-06 15:15:57 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, fmt.Errorf("oidc: %w", err)
|
|
|
|
}
|
|
|
|
return &oauth2.Config{
|
|
|
|
ClientID: appClientID,
|
|
|
|
Endpoint: oauth2.Endpoint{
|
|
|
|
AuthURL: endpoints.AuthorizationEndpoint,
|
|
|
|
TokenURL: endpoints.TokenEndpoint,
|
|
|
|
AuthStyle: oauth2.AuthStyleInParams,
|
|
|
|
},
|
|
|
|
RedirectURL: fmt.Sprintf("http://%s", appRedirectAddr),
|
|
|
|
Scopes: scopes,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// key is currently used for two purposes: OIDC URL prefix and token cache key.
|
|
|
|
// once we decide to start storing scopes in the token cache, we should change
|
|
|
|
// this approach.
|
|
|
|
func (a *PersistentAuth) key() string {
|
|
|
|
a.Host = strings.TrimSuffix(a.Host, "/")
|
|
|
|
if !strings.HasPrefix(a.Host, "http") {
|
|
|
|
a.Host = fmt.Sprintf("https://%s", a.Host)
|
|
|
|
}
|
|
|
|
if a.AccountID != "" {
|
|
|
|
return fmt.Sprintf("%s/oidc/accounts/%s", a.Host, a.AccountID)
|
|
|
|
}
|
|
|
|
return a.Host
|
|
|
|
}
|
|
|
|
|
|
|
|
func (a *PersistentAuth) stateAndPKCE() (string, *authhandler.PKCEParams) {
|
|
|
|
verifier := a.randomString(64)
|
|
|
|
verifierSha256 := sha256.Sum256([]byte(verifier))
|
|
|
|
challenge := base64.RawURLEncoding.EncodeToString(verifierSha256[:])
|
|
|
|
return a.randomString(16), &authhandler.PKCEParams{
|
|
|
|
Challenge: challenge,
|
|
|
|
ChallengeMethod: "S256",
|
|
|
|
Verifier: verifier,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (a *PersistentAuth) randomString(size int) string {
|
|
|
|
raw := make([]byte, size)
|
|
|
|
_, _ = rand.Read(raw)
|
|
|
|
return base64.RawURLEncoding.EncodeToString(raw)
|
|
|
|
}
|
|
|
|
|
|
|
|
type oauthAuthorizationServer struct {
|
|
|
|
AuthorizationEndpoint string `json:"authorization_endpoint"` // ../v1/authorize
|
|
|
|
TokenEndpoint string `json:"token_endpoint"` // ../v1/token
|
|
|
|
}
|