Compare commits

...

3 Commits

Author SHA1 Message Date
shreyas-goenka 4c550e23da
Merge f6232f5aae into e4d039a1aa 2024-10-17 16:28:07 +05:30
Pieter Noordhuis e4d039a1aa
Handle normalization of `dyn.KindTime` into an any type (#1836)
## Changes

The issue reported in #1828 illustrates how using a YAML timestamp-like
value (a date in this case) causes an issue during conversion to and
from the typed configuration tree.

We use the `AsAny()` function on the `dyn.Value` when normalizing for
the `any` type. We only use the `any` type for variable values, because
they can assume every type. The `AsAny()` function returns a `time.Time`
for the time value during conversion **to** the typed configuration
tree. Upon conversion **from** the typed configuration tree back into
the dynamic configuration tree, we cannot distinguish a `time.Time`
struct from any other struct.

To address this, we use the underlying string value of the time value
when we normalize for the `any` type.

Fixes #1828.

## Tests

Existing unit tests pass
2024-10-17 10:00:40 +00:00
Shreyas Goenka f6232f5aae
[WIP] Add support for non python ipynb notebooks 2024-10-11 16:40:15 +02:00
8 changed files with 237 additions and 47 deletions

View File

@ -0,0 +1,33 @@
bundle:
name: issue_1828
variables:
# One entry for each of the underlying YAML (or [dyn.Kind]) types.
# The test confirms we can convert to and from the typed configuration without losing information.
map:
default:
foo: bar
sequence:
default:
- foo
- bar
string:
default: foo
bool:
default: true
int:
default: 42
float:
default: 3.14
time:
default: 2021-01-01
nil:
default:

View File

@ -0,0 +1,48 @@
package config_tests
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestIssue1828(t *testing.T) {
b := load(t, "./issue_1828")
if assert.Contains(t, b.Config.Variables, "map") {
assert.Equal(t, map[string]any{
"foo": "bar",
}, b.Config.Variables["map"].Default)
}
if assert.Contains(t, b.Config.Variables, "sequence") {
assert.Equal(t, []any{
"foo",
"bar",
}, b.Config.Variables["sequence"].Default)
}
if assert.Contains(t, b.Config.Variables, "string") {
assert.Equal(t, "foo", b.Config.Variables["string"].Default)
}
if assert.Contains(t, b.Config.Variables, "bool") {
assert.Equal(t, true, b.Config.Variables["bool"].Default)
}
if assert.Contains(t, b.Config.Variables, "int") {
assert.Equal(t, 42, b.Config.Variables["int"].Default)
}
if assert.Contains(t, b.Config.Variables, "float") {
assert.Equal(t, 3.14, b.Config.Variables["float"].Default)
}
if assert.Contains(t, b.Config.Variables, "time") {
assert.Equal(t, "2021-01-01", b.Config.Variables["time"].Default)
}
if assert.Contains(t, b.Config.Variables, "nil") {
assert.Equal(t, nil, b.Config.Variables["nil"].Default)
}
}

View File

@ -360,7 +360,7 @@ func TestAccFilerReadDir(t *testing.T) {
}
}
var jupyterNotebookContent1 = `
var pythonJupyterNotebookContent1 = `
{
"cells": [
{
@ -384,7 +384,10 @@ var jupyterNotebookContent1 = `
}
`
var jupyterNotebookContent2 = `
// TODO: Does detect notebook work with notebooks exported from databricks?
// They typically do not have the top level "language" key set.
var pythonJupyterNotebookContent2 = `
{
"cells": [
{
@ -408,6 +411,10 @@ var jupyterNotebookContent2 = `
}
`
// TODO: Continue adding tests for other types of notebooks than python here.
// Exporting from a workspace makes this work easier.
func TestAccFilerWorkspaceNotebookConflict(t *testing.T) {
t.Parallel()
@ -424,7 +431,7 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) {
require.NoError(t, err)
err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"first upload\"))"))
require.NoError(t, err)
err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent1))
err = f.Write(ctx, "pythonJupyterNb.ipynb", strings.NewReader(pythonJupyterNotebookContent1))
require.NoError(t, err)
// Assert contents after initial upload
@ -432,7 +439,7 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) {
filerTest{t, f}.assertContents(ctx, "rNb", "# Databricks notebook source\nprint('first upload'))")
filerTest{t, f}.assertContents(ctx, "sqlNb", "-- Databricks notebook source\n SELECT \"first upload\"")
filerTest{t, f}.assertContents(ctx, "scalaNb", "// Databricks notebook source\n println(\"first upload\"))")
filerTest{t, f}.assertContents(ctx, "jupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")")
filerTest{t, f}.assertContents(ctx, "pythonJupyterNb", "# Databricks notebook source\nprint(\"Jupyter Notebook Version 1\")")
// Assert uploading a second time fails due to overwrite mode missing
err = f.Write(ctx, "pyNb.py", strings.NewReader("# Databricks notebook source\nprint('second upload'))"))
@ -451,9 +458,9 @@ func TestAccFilerWorkspaceNotebookConflict(t *testing.T) {
assert.ErrorIs(t, err, fs.ErrExist)
assert.Regexp(t, regexp.MustCompile(`file already exists: .*/scalaNb$`), err.Error())
err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent2))
err = f.Write(ctx, "pythonJupyterNb.ipynb", strings.NewReader(pythonJupyterNotebookContent2))
assert.ErrorIs(t, err, fs.ErrExist)
assert.Regexp(t, regexp.MustCompile(`file already exists: .*/jupyterNb$`), err.Error())
assert.Regexp(t, regexp.MustCompile(`file already exists: .*/pythonJupyterNb$`), err.Error())
}
func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) {
@ -472,7 +479,7 @@ func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) {
require.NoError(t, err)
err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"first upload\"))"))
require.NoError(t, err)
err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent1))
err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(pythonJupyterNotebookContent1))
require.NoError(t, err)
// Assert contents after initial upload
@ -491,7 +498,7 @@ func TestAccFilerWorkspaceNotebookWithOverwriteFlag(t *testing.T) {
require.NoError(t, err)
err = f.Write(ctx, "scalaNb.scala", strings.NewReader("// Databricks notebook source\n println(\"second upload\"))"), filer.OverwriteIfExists)
require.NoError(t, err)
err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(jupyterNotebookContent2), filer.OverwriteIfExists)
err = f.Write(ctx, "jupyterNb.ipynb", strings.NewReader(pythonJupyterNotebookContent2), filer.OverwriteIfExists)
require.NoError(t, err)
// Assert contents have been overwritten
@ -515,8 +522,8 @@ func TestAccFilerWorkspaceFilesExtensionsReadDir(t *testing.T) {
{"foo.r", "print('foo')"},
{"foo.scala", "println('foo')"},
{"foo.sql", "SELECT 'foo'"},
{"jupyterNb.ipynb", jupyterNotebookContent1},
{"jupyterNb2.ipynb", jupyterNotebookContent2},
{"jupyterNb.ipynb", pythonJupyterNotebookContent1},
{"jupyterNb2.ipynb", pythonJupyterNotebookContent2},
{"pyNb.py", "# Databricks notebook source\nprint('first upload'))"},
{"rNb.r", "# Databricks notebook source\nprint('first upload'))"},
{"scalaNb.scala", "// Databricks notebook source\n println(\"first upload\"))"},
@ -582,7 +589,7 @@ func setupFilerWithExtensionsTest(t *testing.T) filer.Filer {
}{
{"foo.py", "# Databricks notebook source\nprint('first upload'))"},
{"bar.py", "print('foo')"},
{"jupyter.ipynb", jupyterNotebookContent1},
{"jupyter.ipynb", pythonJupyterNotebookContent1},
{"pretender", "not a notebook"},
{"dir/file.txt", "file content"},
{"scala-notebook.scala", "// Databricks notebook source\nprintln('first upload')"},
@ -756,7 +763,7 @@ func TestAccWorkspaceFilesExtensions_ExportFormatIsPreserved(t *testing.T) {
assert.ErrorIs(t, err, fs.ErrNotExist)
// Case 2: Jupyter Notebook
err = wf.Write(ctx, "bar.ipynb", strings.NewReader(jupyterNotebookContent1))
err = wf.Write(ctx, "bar.ipynb", strings.NewReader(pythonJupyterNotebookContent1))
require.NoError(t, err)
// The Jupyter notebook should exist but not the source notebook

View File

@ -562,12 +562,10 @@ func setupLocalFiler(t *testing.T) (filer.Filer, string) {
}
func setupWsfsFiler(t *testing.T) (filer.Filer, string) {
t.Log(GetEnvOrSkipTest(t, "CLOUD_ENV"))
ctx, w := acc.WorkspaceTest(t)
ctx := context.Background()
w := databricks.Must(databricks.NewWorkspaceClient())
tmpdir := TemporaryWorkspaceDir(t, w)
f, err := filer.NewWorkspaceFilesClient(w, tmpdir)
tmpdir := TemporaryWorkspaceDir(t, w.W)
f, err := filer.NewWorkspaceFilesClient(w.W, tmpdir)
require.NoError(t, err)
// Check if we can use this API here, skip test if we cannot.

View File

@ -398,6 +398,34 @@ func (n normalizeOptions) normalizeFloat(typ reflect.Type, src dyn.Value, path d
return dyn.NewValue(out, src.Locations()), diags
}
func (n normalizeOptions) normalizeInterface(typ reflect.Type, src dyn.Value, path dyn.Path) (dyn.Value, diag.Diagnostics) {
func (n normalizeOptions) normalizeInterface(_ reflect.Type, src dyn.Value, path dyn.Path) (dyn.Value, diag.Diagnostics) {
// Deal with every [dyn.Kind] here to ensure completeness.
switch src.Kind() {
case dyn.KindMap:
// Fall through
case dyn.KindSequence:
// Fall through
case dyn.KindString:
// Fall through
case dyn.KindBool:
// Fall through
case dyn.KindInt:
// Fall through
case dyn.KindFloat:
// Fall through
case dyn.KindTime:
// Conversion of a time value to an interface{}.
// The [dyn.Value.AsAny] equivalent for this kind is the [time.Time] struct.
// If we convert to a typed representation and back again, we cannot distinguish
// a [time.Time] struct from any other struct.
//
// Therefore, we normalize the time value to a string.
return dyn.NewValue(src.MustTime().String(), src.Locations()), nil
case dyn.KindNil:
// Fall through
default:
return dyn.InvalidValue, diag.Errorf("unsupported kind: %s", src.Kind())
}
return src, nil
}

View File

@ -858,23 +858,7 @@ func TestNormalizeAnchors(t *testing.T) {
}, vout.AsAny())
}
func TestNormalizeBoolToAny(t *testing.T) {
var typ any
vin := dyn.NewValue(false, []dyn.Location{{File: "file", Line: 1, Column: 1}})
vout, err := Normalize(&typ, vin)
assert.Len(t, err, 0)
assert.Equal(t, dyn.NewValue(false, []dyn.Location{{File: "file", Line: 1, Column: 1}}), vout)
}
func TestNormalizeIntToAny(t *testing.T) {
var typ any
vin := dyn.NewValue(10, []dyn.Location{{File: "file", Line: 1, Column: 1}})
vout, err := Normalize(&typ, vin)
assert.Len(t, err, 0)
assert.Equal(t, dyn.NewValue(10, []dyn.Location{{File: "file", Line: 1, Column: 1}}), vout)
}
func TestNormalizeSliceToAny(t *testing.T) {
func TestNormalizeAnyFromSlice(t *testing.T) {
var typ any
v1 := dyn.NewValue(1, []dyn.Location{{File: "file", Line: 1, Column: 1}})
v2 := dyn.NewValue(2, []dyn.Location{{File: "file", Line: 1, Column: 1}})
@ -883,3 +867,35 @@ func TestNormalizeSliceToAny(t *testing.T) {
assert.Len(t, err, 0)
assert.Equal(t, dyn.NewValue([]dyn.Value{v1, v2}, []dyn.Location{{File: "file", Line: 1, Column: 1}}), vout)
}
func TestNormalizeAnyFromString(t *testing.T) {
var typ any
vin := dyn.NewValue("string", []dyn.Location{{File: "file", Line: 1, Column: 1}})
vout, err := Normalize(&typ, vin)
assert.Len(t, err, 0)
assert.Equal(t, dyn.NewValue("string", []dyn.Location{{File: "file", Line: 1, Column: 1}}), vout)
}
func TestNormalizeAnyFromBool(t *testing.T) {
var typ any
vin := dyn.NewValue(false, []dyn.Location{{File: "file", Line: 1, Column: 1}})
vout, err := Normalize(&typ, vin)
assert.Len(t, err, 0)
assert.Equal(t, dyn.NewValue(false, []dyn.Location{{File: "file", Line: 1, Column: 1}}), vout)
}
func TestNormalizeAnyFromInt(t *testing.T) {
var typ any
vin := dyn.NewValue(10, []dyn.Location{{File: "file", Line: 1, Column: 1}})
vout, err := Normalize(&typ, vin)
assert.Len(t, err, 0)
assert.Equal(t, dyn.NewValue(10, []dyn.Location{{File: "file", Line: 1, Column: 1}}), vout)
}
func TestNormalizeAnyFromTime(t *testing.T) {
var typ any
vin := dyn.NewValue(dyn.MustTime("2024-08-29"), []dyn.Location{{File: "file", Line: 1, Column: 1}})
vout, err := Normalize(&typ, vin)
assert.Empty(t, err)
assert.Equal(t, dyn.NewValue("2024-08-29", vin.Locations()), vout)
}

View File

@ -28,6 +28,9 @@ var extensionsToLanguages = map[string]workspace.Language{
".r": workspace.LanguageR,
".scala": workspace.LanguageScala,
".sql": workspace.LanguageSql,
// The platform supports all languages (Python, R, Scala, and SQL) for Jupyter notebooks.
// Thus, we do not need to check the language for .ipynb files.
".ipynb": workspace.LanguagePython,
}
@ -47,6 +50,10 @@ func (w *workspaceFilesExtensionsClient) stat(ctx context.Context, name string)
return info.(wsfsFileInfo), err
}
// TODO: Add end to end tests that the filer works for all .ipynb cases.
// TODO: Also fix the sync issues. OR add tests that sync works fine with non
// python notebooks. Is this needed in the first place?
// This function returns the stat for the provided notebook. The stat object itself contains the path
// with the extension since it is meant to be used in the context of a fs.FileInfo.
func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx context.Context, name string) (*workspaceFileStatus, error) {
@ -75,8 +82,9 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithExt(ctx contex
return nil, nil
}
// Not the correct language. Return early.
if stat.Language != extensionsToLanguages[ext] {
// Not the correct language. Return early. Note: All languages are supported
// for Jupyter notebooks.
if ext != ".ipynb" && stat.Language != extensionsToLanguages[ext] {
log.Debugf(ctx, "attempting to determine if %s could be a notebook. Found a notebook at %s but it is not of the correct language. Expected %s but found %s.", name, path.Join(w.root, nameWithoutExt), extensionsToLanguages[ext], stat.Language)
return nil, nil
}
@ -120,7 +128,8 @@ func (w *workspaceFilesExtensionsClient) getNotebookStatByNameWithoutExt(ctx con
ext := notebook.GetExtensionByLanguage(&stat.ObjectInfo)
// If the notebook was exported as a Jupyter notebook, the extension should be .ipynb.
if stat.Language == workspace.LanguagePython && stat.ReposExportFormat == workspace.ExportFormatJupyter {
// TODO: Test this.
if stat.ReposExportFormat == workspace.ExportFormatJupyter {
ext = ".ipynb"
}

View File

@ -44,15 +44,6 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) {
filePath: "/dir/foo.py",
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.py resolve to the same name /foo.py. Changing the name of one of these objects will resolve this issue",
},
{
name: "python jupyter notebook and file",
language: workspace.LanguagePython,
notebookExportFormat: workspace.ExportFormatJupyter,
notebookPath: "/dir/foo",
filePath: "/dir/foo.py",
// Jupyter notebooks would correspond to foo.ipynb so an error is not expected.
expectedError: "",
},
{
name: "scala source notebook and file",
language: workspace.LanguageScala,
@ -82,6 +73,66 @@ func TestFilerWorkspaceFilesExtensionsErrorsOnDupName(t *testing.T) {
language: workspace.LanguagePython,
notebookExportFormat: workspace.ExportFormatJupyter,
notebookPath: "/dir/foo",
filePath: "/dir/foo.py",
// Jupyter notebooks would correspond to foo.ipynb so an error is not expected.
expectedError: "",
},
{
name: "scala jupyter notebook and file",
language: workspace.LanguageScala,
notebookExportFormat: workspace.ExportFormatJupyter,
notebookPath: "/dir/foo",
filePath: "/dir/foo.scala",
// Jupyter notebooks would correspond to foo.ipynb so an error is not expected.
expectedError: "",
},
{
name: "sql jupyter notebook and file",
language: workspace.LanguageSql,
notebookExportFormat: workspace.ExportFormatJupyter,
notebookPath: "/dir/foo",
filePath: "/dir/foo.sql",
// Jupyter notebooks would correspond to foo.ipynb so an error is not expected.
expectedError: "",
},
{
name: "r jupyter notebook and file",
language: workspace.LanguageR,
notebookExportFormat: workspace.ExportFormatJupyter,
notebookPath: "/dir/foo",
filePath: "/dir/foo.sql",
// Jupyter notebooks would correspond to foo.ipynb so an error is not expected.
expectedError: "",
},
{
name: "python jupyter notebook and file",
language: workspace.LanguagePython,
notebookExportFormat: workspace.ExportFormatJupyter,
notebookPath: "/dir/foo",
filePath: "/dir/foo.ipynb",
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue",
},
{
name: "scala jupyter notebook and file",
language: workspace.LanguageScala,
notebookExportFormat: workspace.ExportFormatJupyter,
notebookPath: "/dir/foo",
filePath: "/dir/foo.ipynb",
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue",
},
{
name: "r jupyter notebook and file",
language: workspace.LanguageR,
notebookExportFormat: workspace.ExportFormatJupyter,
notebookPath: "/dir/foo",
filePath: "/dir/foo.ipynb",
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue",
},
{
name: "sql jupyter notebook and file",
language: workspace.LanguageSql,
notebookExportFormat: workspace.ExportFormatJupyter,
notebookPath: "/dir/foo",
filePath: "/dir/foo.ipynb",
expectedError: "failed to read files from the workspace file system. Duplicate paths encountered. Both NOTEBOOK at /dir/foo and FILE at /dir/foo.ipynb resolve to the same name /foo.ipynb. Changing the name of one of these objects will resolve this issue",
},