Skip to content

Commit

Permalink
fix: schema parser improvements for complex CRD patterns
Browse files Browse the repository at this point in the history
This patch fixes schema validation and expression extraction for CRDs that
define schemas like `map[string]struct{properties any}` using a combination
of `additionalProperties` and `x-kubernetes-preserve-unknown-fields`
vendor extension.

this commit improves structured resource parsing to:
- safely handle empty/nil schema types with proper length checks
- Support `x-kubernetes-preserve-unknown-fields` at any schema level by validating
  extensions before type checking
- Support patterns like `map[string]struct{Properties any}`

Example of schemas we parse correctly now:
```yaml
  resources:
    additionalProperties:  # map[string]->struct
      properties:
        properties:       # any
          x-kubernetes-preserve-unknown-fields: true
```

links:
- https://github.com/pulumi/pulumi-kubernetes-operator/blob/master/operator/api/pulumi/v1/program_types.go#L64-L82
- https://github.com/pulumi/pulumi-kubernetes-operator/blob/master/operator/config/crd/bases/pulumi.com_programs.yaml
  • Loading branch information
a-hilaly committed Feb 2, 2025
1 parent 45adaf5 commit f582124
Show file tree
Hide file tree
Showing 2 changed files with 243 additions and 23 deletions.
57 changes: 34 additions & 23 deletions pkg/graph/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,14 @@ func parseResource(resource interface{}, schema *spec.Schema, path string) ([]va
}

func getExpectedTypes(schema *spec.Schema) ([]string, error) {
// Handle "x-kubernetes-preserve-unknown-fields" extension first
if hasStructuralSchemaMarkerEnabled(schema, xKubernetesPreserveUnknownFields) {
return []string{schemaTypeAny}, nil
}

// Handle "x-kubernetes-int-or-string" extension
if ext, ok := schema.VendorExtensible.Extensions[xKubernetesIntOrString]; ok {
enabled, ok := ext.(bool)
if !ok {
return nil, fmt.Errorf("xKubernetesIntOrString extension is not a boolean")
}
if enabled {
return []string{"string", "integer"}, nil
}
if hasStructuralSchemaMarkerEnabled(schema, xKubernetesIntOrString) {
return []string{"string", "integer"}, nil
}

// Handle OneOf schemas
Expand Down Expand Up @@ -110,9 +109,10 @@ func getExpectedTypes(schema *spec.Schema) ([]string, error) {
return types, nil
}

if schema.Type[0] != "" {
if len(schema.Type) > 0 && schema.Type[0] != "" {
return schema.Type, nil
}

if schema.AdditionalProperties != nil && schema.AdditionalProperties.Allows {
// NOTE(a-hilaly): I don't like the type "any", we might want to change this to "object"
// in the future; just haven't really thought about it yet.
Expand All @@ -123,14 +123,15 @@ func getExpectedTypes(schema *spec.Schema) ([]string, error) {
return nil, fmt.Errorf("unknown schema type")
}

func sliceInclude(expectedTypes []string, expectedType string) bool {
return slices.Contains(expectedTypes, expectedType)
}

func validateSchema(schema *spec.Schema, path string) error {
if schema == nil {
return fmt.Errorf("schema is nil for path %s", path)
}

if hasStructuralSchemaMarkerEnabled(schema, xKubernetesPreserveUnknownFields) {
return nil
}

// Ensure the schema has at least one valid construct
if len(schema.Type) == 0 && len(schema.OneOf) == 0 && len(schema.AnyOf) == 0 && schema.AdditionalProperties == nil {
return fmt.Errorf("schema at path %s has no valid type, OneOf, AnyOf, or AdditionalProperties", path)
Expand All @@ -139,18 +140,14 @@ func validateSchema(schema *spec.Schema, path string) error {
}

func parseObject(field map[string]interface{}, schema *spec.Schema, path string, expectedTypes []string) ([]variable.FieldDescriptor, error) {
if !sliceInclude(expectedTypes, "object") && (schema.AdditionalProperties == nil || !schema.AdditionalProperties.Allows) {
return nil, fmt.Errorf("expected object type or AdditionalProperties allowed for path %s, got %v", path, field)
}

// Look for vendor schema extensions first
if len(schema.VendorExtensible.Extensions) > 0 {
// If the schema has the x-kubernetes-preserve-unknown-fields extension, we need to parse
// this field using the schemaless parser. This allows us to extract CEL expressions from
// fields that don't have a strict schema definition, while still preserving any unknown
// fields. This is particularly important for handling custom resources and fields that
// may contain arbitrary nested structures with potential CEL expressions.
if enabled, ok := schema.VendorExtensible.Extensions[xKubernetesPreserveUnknownFields]; ok && enabled.(bool) {
if hasStructuralSchemaMarkerEnabled(schema, xKubernetesPreserveUnknownFields) {
expressions, err := parseSchemalessResource(field, path)
if err != nil {
return nil, err
Expand All @@ -159,6 +156,10 @@ func parseObject(field map[string]interface{}, schema *spec.Schema, path string,
}
}

if !slices.Contains(expectedTypes, "object") && (schema.AdditionalProperties == nil || !schema.AdditionalProperties.Allows) {
return nil, fmt.Errorf("expected object type or AdditionalProperties allowed for path %s, got %v", path, field)
}

var expressionsFields []variable.FieldDescriptor
for fieldName, value := range field {
fieldSchema, err := getFieldSchema(schema, fieldName)
Expand All @@ -176,7 +177,7 @@ func parseObject(field map[string]interface{}, schema *spec.Schema, path string,
}

func parseArray(field []interface{}, schema *spec.Schema, path string, expectedTypes []string) ([]variable.FieldDescriptor, error) {
if !sliceInclude(expectedTypes, "array") {
if !slices.Contains(expectedTypes, "array") {
return nil, fmt.Errorf("expected array type for path %s, got %v", path, field)
}

Expand Down Expand Up @@ -213,7 +214,7 @@ func parseString(field string, schema *spec.Schema, path string, expectedTypes [
}}, nil
}

if !sliceInclude(expectedTypes, "string") && !sliceInclude(expectedTypes, schemaTypeAny) {
if !slices.Contains(expectedTypes, "string") && !slices.Contains(expectedTypes, schemaTypeAny) {
return nil, fmt.Errorf("expected string type or AdditionalProperties for path %s, got %v", path, field)
}

Expand All @@ -234,15 +235,15 @@ func parseString(field string, schema *spec.Schema, path string, expectedTypes [
func parseScalarTypes(field interface{}, _ *spec.Schema, path string, expectedTypes []string) ([]variable.FieldDescriptor, error) {
// perform type checks for scalar types
switch {
case sliceInclude(expectedTypes, "number"):
case slices.Contains(expectedTypes, "number"):
if _, ok := field.(float64); !ok {
return nil, fmt.Errorf("expected number type for path %s, got %T", path, field)
}
case sliceInclude(expectedTypes, "int"), sliceInclude(expectedTypes, "integer"):
case slices.Contains(expectedTypes, "int"), slices.Contains(expectedTypes, "integer"):
if !isInteger(field) {
return nil, fmt.Errorf("expected integer type for path %s, got %T", path, field)
}
case sliceInclude(expectedTypes, "boolean"), sliceInclude(expectedTypes, "bool"):
case slices.Contains(expectedTypes, "boolean"), slices.Contains(expectedTypes, "bool"):
if _, ok := field.(bool); !ok {
return nil, fmt.Errorf("expected boolean type for path %s, got %T", path, field)
}
Expand Down Expand Up @@ -306,3 +307,13 @@ func joinPathAndFieldName(path, fieldName string) string {
}
return fmt.Sprintf("%s.%s", path, fieldName)
}

// hasStructuralSchemaMarkerEnabled checks if a schema has a specific marker enabled.
func hasStructuralSchemaMarkerEnabled(schema *spec.Schema, marker string) bool {
if ext, ok := schema.VendorExtensible.Extensions[marker]; ok {
if enabled, ok := ext.(bool); ok && enabled {
return true
}
}
return false
}
209 changes: 209 additions & 0 deletions pkg/graph/parser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1286,3 +1286,212 @@ func TestOneOfWithStructuralConstraints(t *testing.T) {
}
})
}

func TestPreserveUnknownFields(t *testing.T) {
testCases := []struct {
name string
schema *spec.Schema
resource map[string]interface{}
wantErr bool
expectedError string
expectedExpressions []variable.FieldDescriptor
}{
{
name: "schema with no type but x-kubernetes-preserve-unknown-fields",
schema: &spec.Schema{
VendorExtensible: spec.VendorExtensible{
Extensions: spec.Extensions{
"x-kubernetes-preserve-unknown-fields": true,
},
},
},
resource: map[string]interface{}{
"spec": map[string]interface{}{
"template": "${template.value}",
},
},
wantErr: false,
expectedExpressions: []variable.FieldDescriptor{
{
Path: "spec.template",
Expressions: []string{"template.value"},
ExpectedTypes: []string{"any"},
StandaloneExpression: true,
},
},
},
{
name: "schema with no type but x-kubernetes-preserve-unknown-fields, expression in nested object",
schema: &spec.Schema{
VendorExtensible: spec.VendorExtensible{
Extensions: spec.Extensions{
"x-kubernetes-preserve-unknown-fields": true,
},
},
},
resource: map[string]interface{}{
"spec": map[string]interface{}{
"field1": "noisy string",
"template": map[string]interface{}{
"nested": []interface{}{
map[string]interface{}{
"key": "${template.value}",
},
},
},
},
},
wantErr: false,
expectedExpressions: []variable.FieldDescriptor{
{
Path: "spec.template.nested[0].key",
Expressions: []string{"template.value"},
ExpectedTypes: []string{"any"},
StandaloneExpression: true,
},
},
},
{
name: "pulumi-style mixed schema",
schema: &spec.Schema{
SchemaProps: spec.SchemaProps{
Type: []string{"object"},
Properties: map[string]spec.Schema{
"program": {
SchemaProps: spec.SchemaProps{
Type: []string{"object"},
Properties: map[string]spec.Schema{
"resources": {
SchemaProps: spec.SchemaProps{
Type: []string{"object"},
AdditionalProperties: &spec.SchemaOrBool{
Allows: true,
Schema: &spec.Schema{
SchemaProps: spec.SchemaProps{
Type: []string{"object"},
Properties: map[string]spec.Schema{
"properties": {
VendorExtensible: spec.VendorExtensible{
Extensions: spec.Extensions{
"x-kubernetes-preserve-unknown-fields": true,
},
},
},
},
},
},
},
},
},
},
},
},
},
},
},
resource: map[string]interface{}{
"program": map[string]interface{}{
"resources": map[string]interface{}{
"app": map[string]interface{}{
"properties": map[string]interface{}{
"spec": map[string]interface{}{
"name": "${schema.spec.name}",
"region": "${schema.spec.region}",
"services": []interface{}{
map[string]interface{}{
"name": "${schema.spec.name}-service",
"instanceCount": "${schema.spec.instanceCount}",
},
},
},
},
},
},
},
},
wantErr: false,
expectedExpressions: []variable.FieldDescriptor{
{
Path: "program.resources.app.properties.spec.name",
Expressions: []string{"schema.spec.name"},
ExpectedTypes: []string{"any"},
StandaloneExpression: true,
},
{
Path: "program.resources.app.properties.spec.region",
Expressions: []string{"schema.spec.region"},
ExpectedTypes: []string{"any"},
StandaloneExpression: true,
},
{
Path: "program.resources.app.properties.spec.services[0].name",
Expressions: []string{"schema.spec.name"},
ExpectedTypes: []string{"any"},
StandaloneExpression: false,
},
{
Path: "program.resources.app.properties.spec.services[0].instanceCount",
Expressions: []string{"schema.spec.instanceCount"},
ExpectedTypes: []string{"any"},
StandaloneExpression: true,
},
},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
expressions, err := ParseResource(tc.resource, tc.schema)
if tc.wantErr {
if err == nil {
t.Error("Expected error but got none")
} else if tc.expectedError != "" && err.Error() != tc.expectedError {
t.Errorf("Expected error message %q, got %q", tc.expectedError, err.Error())
}
} else {
if err != nil {
t.Errorf("Did not expect error but got: %v", err)
return
}

if len(expressions) != len(tc.expectedExpressions) {
t.Errorf("Expected %d expressions, got %d", len(tc.expectedExpressions), len(expressions))
t.Errorf("Got expressions:")
for _, expr := range expressions {
t.Errorf(" %+v", expr)
}
return
}

// Create maps for easier comparison
actualMap := make(map[string]variable.FieldDescriptor)
expectedMap := make(map[string]variable.FieldDescriptor)

for _, expr := range expressions {
actualMap[expr.Path] = expr
}
for _, expr := range tc.expectedExpressions {
expectedMap[expr.Path] = expr
}

for path, expectedExpr := range expectedMap {
actualExpr, ok := actualMap[path]
if !ok {
t.Errorf("Missing expected expression for path %s", path)
continue
}

if !reflect.DeepEqual(actualExpr.Expressions, expectedExpr.Expressions) {
t.Errorf("Path %s: expected expressions %v, got %v", path, expectedExpr.Expressions, actualExpr.Expressions)
}
if !reflect.DeepEqual(actualExpr.ExpectedTypes, expectedExpr.ExpectedTypes) {
t.Errorf("Path %s: expected types %v, got %v", path, expectedExpr.ExpectedTypes, actualExpr.ExpectedTypes)
}
if actualExpr.StandaloneExpression != expectedExpr.StandaloneExpression {
t.Errorf("Path %s: expected StandaloneExpression %v, got %v", path, expectedExpr.StandaloneExpression, actualExpr.StandaloneExpression)
}
}
}
})
}
}

0 comments on commit f582124

Please sign in to comment.