From 757d5efe8dd193d7cef792a4524481f136f20677 Mon Sep 17 00:00:00 2001 From: shreyas-goenka <88374338+shreyas-goenka@users.noreply.github.com> Date: Mon, 25 Sep 2023 11:53:38 +0200 Subject: [PATCH] Add support for regex patterns in template schema (#768) ## Changes This PR introduces support for regex pattern validation in our custom jsonschema validator. This allows us to fail early if a user enters an invalid value for a field. For example, now this is what initializing the default template looks like with an invalid project name: ``` shreyas.goenka@THW32HFW6T bricks % cli bundle init Template to use [default-python]: Unique name for this project [my_project]: (_*_) Error: invalid value for project_name: (_*_). Must consist of letter and underscores only. ``` ## Tests New unit tests and manually. --- libs/jsonschema/extension.go | 4 + libs/jsonschema/instance.go | 12 +++ libs/jsonschema/instance_test.go | 40 +++++++++ libs/jsonschema/schema.go | 38 +++++++++ libs/jsonschema/schema_test.go | 83 +++++++++++++++++++ ...st-schema-pattern-with-custom-message.json | 9 ++ .../test-schema-pattern.json | 8 ++ libs/jsonschema/utils.go | 30 +++++++ libs/jsonschema/utils_test.go | 37 +++++++++ libs/template/config.go | 5 ++ .../databricks_template_schema.json | 4 +- .../default-python/template/__preamble.tmpl | 7 -- 12 files changed, 269 insertions(+), 8 deletions(-) create mode 100644 libs/jsonschema/testdata/instance-validate/test-schema-pattern-with-custom-message.json create mode 100644 libs/jsonschema/testdata/instance-validate/test-schema-pattern.json diff --git a/libs/jsonschema/extension.go b/libs/jsonschema/extension.go index bbbde695..57f3e873 100644 --- a/libs/jsonschema/extension.go +++ b/libs/jsonschema/extension.go @@ -11,4 +11,8 @@ type Extension struct { // If not defined, the field is ordered alphabetically after all fields // that do have an order defined. Order *int `json:"order,omitempty"` + + // PatternMatchFailureMessage is a user defined message that is displayed to the + // user if a JSON schema pattern match fails. + PatternMatchFailureMessage string `json:"pattern_match_failure_message,omitempty"` } diff --git a/libs/jsonschema/instance.go b/libs/jsonschema/instance.go index 229a45b5..6b3e3af4 100644 --- a/libs/jsonschema/instance.go +++ b/libs/jsonschema/instance.go @@ -45,6 +45,7 @@ func (s *Schema) ValidateInstance(instance map[string]any) error { s.validateEnum, s.validateRequired, s.validateTypes, + s.validatePattern, } { err := fn(instance) if err != nil { @@ -111,3 +112,14 @@ func (s *Schema) validateEnum(instance map[string]any) error { } return nil } + +func (s *Schema) validatePattern(instance map[string]any) error { + for k, v := range instance { + fieldInfo, ok := s.Properties[k] + if !ok { + continue + } + return ValidatePatternMatch(k, v, fieldInfo) + } + return nil +} diff --git a/libs/jsonschema/instance_test.go b/libs/jsonschema/instance_test.go index ffd10ca4..3a357d71 100644 --- a/libs/jsonschema/instance_test.go +++ b/libs/jsonschema/instance_test.go @@ -153,3 +153,43 @@ func TestValidateInstanceEnum(t *testing.T) { assert.EqualError(t, schema.validateEnum(invalidIntInstance), "expected value of property bar to be one of [2 4 6]. Found: 1") assert.EqualError(t, schema.ValidateInstance(invalidIntInstance), "expected value of property bar to be one of [2 4 6]. Found: 1") } + +func TestValidateInstancePattern(t *testing.T) { + schema, err := Load("./testdata/instance-validate/test-schema-pattern.json") + require.NoError(t, err) + + validInstance := map[string]any{ + "foo": "axyzc", + } + assert.NoError(t, schema.validatePattern(validInstance)) + assert.NoError(t, schema.ValidateInstance(validInstance)) + + invalidInstanceValue := map[string]any{ + "foo": "xyz", + } + assert.EqualError(t, schema.validatePattern(invalidInstanceValue), "invalid value for foo: \"xyz\". Expected to match regex pattern: a.*c") + assert.EqualError(t, schema.ValidateInstance(invalidInstanceValue), "invalid value for foo: \"xyz\". Expected to match regex pattern: a.*c") + + invalidInstanceType := map[string]any{ + "foo": 1, + } + assert.EqualError(t, schema.validatePattern(invalidInstanceType), "invalid value for foo: 1. Expected a value of type string") + assert.EqualError(t, schema.ValidateInstance(invalidInstanceType), "incorrect type for property foo: expected type string, but value is 1") +} + +func TestValidateInstancePatternWithCustomMessage(t *testing.T) { + schema, err := Load("./testdata/instance-validate/test-schema-pattern-with-custom-message.json") + require.NoError(t, err) + + validInstance := map[string]any{ + "foo": "axyzc", + } + assert.NoError(t, schema.validatePattern(validInstance)) + assert.NoError(t, schema.ValidateInstance(validInstance)) + + invalidInstanceValue := map[string]any{ + "foo": "xyz", + } + assert.EqualError(t, schema.validatePattern(invalidInstanceValue), "invalid value for foo: \"xyz\". Please enter a string starting with 'a' and ending with 'c'") + assert.EqualError(t, schema.ValidateInstance(invalidInstanceValue), "invalid value for foo: \"xyz\". Please enter a string starting with 'a' and ending with 'c'") +} diff --git a/libs/jsonschema/schema.go b/libs/jsonschema/schema.go index 108102a6..dc319bfe 100644 --- a/libs/jsonschema/schema.go +++ b/libs/jsonschema/schema.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "os" + "regexp" "slices" ) @@ -45,6 +46,11 @@ type Schema struct { // List of valid values for a JSON instance for this schema. Enum []any `json:"enum,omitempty"` + // A pattern is a regular expression the object will be validated against. + // Can only be used with type "string". The regex syntax supported is available + // here: https://github.com/google/re2/wiki/Syntax + Pattern string `json:"pattern,omitempty"` + // Extension embeds our custom JSON schema extensions. Extension } @@ -112,6 +118,38 @@ func (schema *Schema) validate() error { return fmt.Errorf("list of enum values for property %s does not contain default value %v: %v", name, property.Default, property.Enum) } } + + // Validate usage of "pattern" is consistent. + for name, property := range schema.Properties { + pattern := property.Pattern + if pattern == "" { + continue + } + + // validate property type is string + if property.Type != StringType { + return fmt.Errorf("property %q has a non-empty regex pattern %q specified. Patterns are only supported for string properties", name, pattern) + } + + // validate regex pattern syntax + r, err := regexp.Compile(pattern) + if err != nil { + return fmt.Errorf("invalid regex pattern %q provided for property %q: %w", pattern, name, err) + } + + // validate default value against the pattern + if property.Default != nil && !r.MatchString(property.Default.(string)) { + return fmt.Errorf("default value %q for property %q does not match specified regex pattern: %q", property.Default, name, pattern) + } + + // validate enum values against the pattern + for i, enum := range property.Enum { + if !r.MatchString(enum.(string)) { + return fmt.Errorf("enum value %q at index %v for property %q does not match specified regex pattern: %q", enum, i, name, pattern) + } + } + } + return nil } diff --git a/libs/jsonschema/schema_test.go b/libs/jsonschema/schema_test.go index db559ea8..aff2d962 100644 --- a/libs/jsonschema/schema_test.go +++ b/libs/jsonschema/schema_test.go @@ -139,3 +139,86 @@ func TestSchemaValidateErrorWhenDefaultValueIsNotInEnums(t *testing.T) { err = validSchema.validate() assert.NoError(t, err) } + +func TestSchemaValidatePatternType(t *testing.T) { + s := &Schema{ + Properties: map[string]*Schema{ + "foo": { + Type: "number", + Pattern: "abc", + }, + }, + } + assert.EqualError(t, s.validate(), "property \"foo\" has a non-empty regex pattern \"abc\" specified. Patterns are only supported for string properties") + + s = &Schema{ + Properties: map[string]*Schema{ + "foo": { + Type: "string", + Pattern: "abc", + }, + }, + } + assert.NoError(t, s.validate()) +} + +func TestSchemaValidateIncorrectRegex(t *testing.T) { + s := &Schema{ + Properties: map[string]*Schema{ + "foo": { + Type: "string", + // invalid regex, missing the closing brace + Pattern: "(abc", + }, + }, + } + assert.EqualError(t, s.validate(), "invalid regex pattern \"(abc\" provided for property \"foo\": error parsing regexp: missing closing ): `(abc`") +} + +func TestSchemaValidatePatternDefault(t *testing.T) { + s := &Schema{ + Properties: map[string]*Schema{ + "foo": { + Type: "string", + Pattern: "abc", + Default: "def", + }, + }, + } + assert.EqualError(t, s.validate(), "default value \"def\" for property \"foo\" does not match specified regex pattern: \"abc\"") + + s = &Schema{ + Properties: map[string]*Schema{ + "foo": { + Type: "string", + Pattern: "a.*d", + Default: "axyzd", + }, + }, + } + assert.NoError(t, s.validate()) +} + +func TestSchemaValidatePatternEnum(t *testing.T) { + s := &Schema{ + Properties: map[string]*Schema{ + "foo": { + Type: "string", + Pattern: "a.*c", + Enum: []any{"abc", "def", "abbc"}, + }, + }, + } + assert.EqualError(t, s.validate(), "enum value \"def\" at index 1 for property \"foo\" does not match specified regex pattern: \"a.*c\"") + + s = &Schema{ + Properties: map[string]*Schema{ + "foo": { + Type: "string", + Pattern: "a.*d", + Enum: []any{"abd", "axybgd", "abbd"}, + }, + }, + } + assert.NoError(t, s.validate()) +} diff --git a/libs/jsonschema/testdata/instance-validate/test-schema-pattern-with-custom-message.json b/libs/jsonschema/testdata/instance-validate/test-schema-pattern-with-custom-message.json new file mode 100644 index 00000000..29296d06 --- /dev/null +++ b/libs/jsonschema/testdata/instance-validate/test-schema-pattern-with-custom-message.json @@ -0,0 +1,9 @@ +{ + "properties": { + "foo": { + "type": "string", + "pattern": "a.*c", + "pattern_match_failure_message": "Please enter a string starting with 'a' and ending with 'c'" + } + } +} diff --git a/libs/jsonschema/testdata/instance-validate/test-schema-pattern.json b/libs/jsonschema/testdata/instance-validate/test-schema-pattern.json new file mode 100644 index 00000000..e7b49d15 --- /dev/null +++ b/libs/jsonschema/testdata/instance-validate/test-schema-pattern.json @@ -0,0 +1,8 @@ +{ + "properties": { + "foo": { + "type": "string", + "pattern": "a.*c" + } + } +} diff --git a/libs/jsonschema/utils.go b/libs/jsonschema/utils.go index 66db9603..7bb666c7 100644 --- a/libs/jsonschema/utils.go +++ b/libs/jsonschema/utils.go @@ -3,6 +3,7 @@ package jsonschema import ( "errors" "fmt" + "regexp" "strconv" ) @@ -111,3 +112,32 @@ func FromString(s string, T Type) (any, error) { } return v, err } + +func ValidatePatternMatch(name string, value any, propertySchema *Schema) error { + if propertySchema.Pattern == "" { + // Return early if no pattern is specified + return nil + } + + // Expect type of value to be a string + stringValue, ok := value.(string) + if !ok { + return fmt.Errorf("invalid value for %s: %v. Expected a value of type string", name, value) + } + + match, err := regexp.MatchString(propertySchema.Pattern, stringValue) + if err != nil { + return err + } + if match { + // successful match + return nil + } + + // If custom user error message is defined, return error with the custom message + msg := propertySchema.PatternMatchFailureMessage + if msg == "" { + msg = fmt.Sprintf("Expected to match regex pattern: %s", propertySchema.Pattern) + } + return fmt.Errorf("invalid value for %s: %q. %s", name, value, msg) +} diff --git a/libs/jsonschema/utils_test.go b/libs/jsonschema/utils_test.go index 29529aaa..4c43e57d 100644 --- a/libs/jsonschema/utils_test.go +++ b/libs/jsonschema/utils_test.go @@ -128,3 +128,40 @@ func TestTemplateToStringSlice(t *testing.T) { assert.NoError(t, err) assert.Equal(t, []string{"1.1", "2.2", "3.3"}, s) } + +func TestValidatePropertyPatternMatch(t *testing.T) { + var err error + + // Expect no error if no pattern is specified. + err = ValidatePatternMatch("foo", 1, &Schema{Type: "integer"}) + assert.NoError(t, err) + + // Expect error because value is not a string. + err = ValidatePatternMatch("bar", 1, &Schema{Type: "integer", Pattern: "abc"}) + assert.EqualError(t, err, "invalid value for bar: 1. Expected a value of type string") + + // Expect error because the pattern is invalid. + err = ValidatePatternMatch("bar", "xyz", &Schema{Type: "string", Pattern: "(abc"}) + assert.EqualError(t, err, "error parsing regexp: missing closing ): `(abc`") + + // Expect no error because the pattern matches. + err = ValidatePatternMatch("bar", "axyzd", &Schema{Type: "string", Pattern: "(a*.d)"}) + assert.NoError(t, err) + + // Expect custom error message on match fail + err = ValidatePatternMatch("bar", "axyze", &Schema{ + Type: "string", + Pattern: "(a*.d)", + Extension: Extension{ + PatternMatchFailureMessage: "my custom msg", + }, + }) + assert.EqualError(t, err, "invalid value for bar: \"axyze\". my custom msg") + + // Expect generic message on match fail + err = ValidatePatternMatch("bar", "axyze", &Schema{ + Type: "string", + Pattern: "(a*.d)", + }) + assert.EqualError(t, err, "invalid value for bar: \"axyze\". Expected to match regex pattern: (a*.d)") +} diff --git a/libs/template/config.go b/libs/template/config.go index 21618ac9..2062f320 100644 --- a/libs/template/config.go +++ b/libs/template/config.go @@ -121,6 +121,11 @@ func (c *config) promptForValues() error { } + // Validate the property matches any specified regex pattern. + if err := jsonschema.ValidatePatternMatch(name, userInput, property); err != nil { + return err + } + // Convert user input string back to a value c.values[name], err = jsonschema.FromString(userInput, property.Type) if err != nil { diff --git a/libs/template/templates/default-python/databricks_template_schema.json b/libs/template/templates/default-python/databricks_template_schema.json index db8adcce..8784841e 100644 --- a/libs/template/templates/default-python/databricks_template_schema.json +++ b/libs/template/templates/default-python/databricks_template_schema.json @@ -4,7 +4,9 @@ "type": "string", "default": "my_project", "description": "Unique name for this project", - "order": 1 + "order": 1, + "pattern": "^[A-Za-z0-9_]*$", + "pattern_match_failure_message": "Must consist of letter and underscores only." }, "include_notebook": { "type": "string", diff --git a/libs/template/templates/default-python/template/__preamble.tmpl b/libs/template/templates/default-python/template/__preamble.tmpl index a86d3bff..54732493 100644 --- a/libs/template/templates/default-python/template/__preamble.tmpl +++ b/libs/template/templates/default-python/template/__preamble.tmpl @@ -4,13 +4,6 @@ This file only template directives; it is skipped for the actual output. {{skip "__preamble"}} -{{ $value := .project_name }} -{{with (regexp "^[A-Za-z0-9_]*$")}} - {{if not (.MatchString $value)}} - {{fail "Invalid project_name: %s. Must consist of letter and underscores only." $value}} - {{end}} -{{end}} - {{$notDLT := not (eq .include_dlt "yes")}} {{$notNotebook := not (eq .include_notebook "yes")}} {{$notPython := not (eq .include_python "yes")}}