Skip to content

Commit

Permalink
chore: sanitize transformation error message to hide record values (#221
Browse files Browse the repository at this point in the history
)

Co-authored-by: Ben Church <[email protected]>
  • Loading branch information
ChristoGrab and bnchrch authored Jan 16, 2025
1 parent c55fbbe commit b5ed82c
Show file tree
Hide file tree
Showing 2 changed files with 84 additions and 13 deletions.
32 changes: 29 additions & 3 deletions airbyte_cdk/sources/utils/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from jsonschema import Draft7Validator, RefResolver, ValidationError, Validator, validators

MAX_NESTING_DEPTH = 3
json_to_python_simple = {
"string": str,
"number": float,
Expand Down Expand Up @@ -225,6 +226,31 @@ def transform(
logger.warning(self.get_error_message(e))

def get_error_message(self, e: ValidationError) -> str:
instance_json_type = python_to_json[type(e.instance)]
key_path = "." + ".".join(map(str, e.path))
return f"Failed to transform value {repr(e.instance)} of type '{instance_json_type}' to '{e.validator_value}', key path: '{key_path}'"
"""
Construct a sanitized error message from a ValidationError instance.
"""
field_path = ".".join(map(str, e.path))
type_structure = self._get_type_structure(e.instance)

return f"Failed to transform value from type '{type_structure}' to type '{e.validator_value}' at path: '{field_path}'"

def _get_type_structure(self, input_data: Any, current_depth: int = 0) -> Any:
"""
Get the structure of a given input data for use in error message construction.
"""
# Handle null values
if input_data is None:
return "null"

# Avoid recursing too deep
if current_depth >= MAX_NESTING_DEPTH:
return "object" if isinstance(input_data, dict) else python_to_json[type(input_data)]

if isinstance(input_data, dict):
return {
key: self._get_type_structure(field_value, current_depth + 1)
for key, field_value in input_data.items()
}

else:
return python_to_json[type(input_data)]
65 changes: 55 additions & 10 deletions unit_tests/sources/utils/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@
@pytest.mark.parametrize(
"schema, actual, expected, expected_warns",
[
(SIMPLE_SCHEMA, {"value": 12}, {"value": "12"}, None),
(SIMPLE_SCHEMA, {"value": 12}, {"value": "12"}, None),
(
SIMPLE_SCHEMA,
Expand Down Expand Up @@ -104,14 +103,14 @@
COMPLEX_SCHEMA,
{"prop": 12, "number_prop": "aa12", "array": [12]},
{"prop": "12", "number_prop": "aa12", "array": ["12"]},
"Failed to transform value 'aa12' of type 'string' to 'number', key path: '.number_prop'",
"Failed to transform value from type 'string' to type 'number' at path: 'number_prop'",
),
# Field too_many_types have ambigious type, skip formatting
(
COMPLEX_SCHEMA,
{"prop": 12, "too_many_types": 1212, "array": [12]},
{"prop": "12", "too_many_types": 1212, "array": ["12"]},
"Failed to transform value 1212 of type 'integer' to '['boolean', 'null', 'string']', key path: '.too_many_types'",
"Failed to transform value from type 'integer' to type '['boolean', 'null', 'string']' at path: 'too_many_types'",
),
# Test null field
(COMPLEX_SCHEMA, {"prop": None, "array": [12]}, {"prop": "None", "array": ["12"]}, None),
Expand Down Expand Up @@ -196,7 +195,7 @@
},
{"value": "string"},
{"value": "string"},
"Failed to transform value 'string' of type 'string' to 'array', key path: '.value'",
"Failed to transform value from type 'string' to type 'array' at path: 'value'",
),
(
{
Expand All @@ -205,21 +204,21 @@
},
{"value": {"key": "value"}},
{"value": {"key": "value"}},
"Failed to transform value {'key': 'value'} of type 'object' to 'array', key path: '.value'",
"Failed to transform value from type '{'key': 'string'}' to type 'array' at path: 'value'",
),
(
# Schema root object is not an object, no convertion should happen
{"type": "integer"},
{"value": "12"},
{"value": "12"},
"Failed to transform value {'value': '12'} of type 'object' to 'integer', key path: '.'",
"Failed to transform value from type '{'value': 'string'}' to type 'integer' at path: ''",
),
(
# More than one type except null, no conversion should happen
{"type": "object", "properties": {"value": {"type": ["string", "boolean", "null"]}}},
{"value": 12},
{"value": 12},
"Failed to transform value 12 of type 'integer' to '['string', 'boolean', 'null']', key path: '.value'",
"Failed to transform value from type 'integer' to type '['string', 'boolean', 'null']' at path: 'value'",
),
(
# Oneof not suported, no conversion for one_of_value should happen
Expand Down Expand Up @@ -252,7 +251,7 @@
},
{"value": {"key": "value"}},
{"value": {"key": "value"}},
"Failed to transform value {'key': 'value'} of type 'object' to 'array', key path: '.value'",
"Failed to transform value from type '{'key': 'string'}' to type 'array' at path: 'value'",
),
(
{
Expand All @@ -263,7 +262,7 @@
},
{"value1": "value2"},
{"value1": "value2"},
"Failed to transform value 'value2' of type 'string' to 'object', key path: '.value1'",
"Failed to transform value from type 'string' to type 'object' at path: 'value1'",
),
(
{
Expand All @@ -272,9 +271,55 @@
},
{"value": ["one", "two"]},
{"value": ["one", "two"]},
"Failed to transform value 'one' of type 'string' to 'object', key path: '.value.0'",
"Failed to transform value from type 'string' to type 'object' at path: 'value.0'",
),
(
{"type": "string"},
None,
None,
"Failed to transform value from type 'null' to type 'string' at path: ''",
),
(
{"type": "string"},
{"a": {"b": {"c": {"d": {"e": "deep value"}}}}},
{"a": {"b": {"c": {"d": {"e": "deep value"}}}}},
"Failed to transform value from type '{'a': {'b': {'c': 'object'}}}' to type 'string' at path: ''",
),
],
ids=[
"simple_number_to_string",
"preserve_unexpected_fields",
"array_with_mixed_types",
"nested_list_conversion",
"array_in_nested_object",
"string_to_boolean_nested",
"empty_object",
"string_to_integer",
"skip_invalid_number_format",
"skip_ambiguous_types",
"null_to_string",
"preserve_null_when_allowed",
"very_nested_object_conversion",
"null_in_nested_structure",
"object_without_properties",
"array_without_items",
"non_array_to_array",
"number_to_array",
"null_to_array",
"null_preserved_for_nullable_array",
"number_to_string_array",
"string_fails_object_array",
"object_fails_array_with_string_array_items",
"non_object_root_schema",
"multiple_allowed_types",
"oneof_not_supported",
"facebook_cpc_number_conversion",
"object_fails_array_with_string_item",
"string_fails_object_conversion",
"string_fails_object_in_array",
"null_input_data",
"max_nesting_depth_protection",
],
)
def test_transform(schema, actual, expected, expected_warns, caplog):
t = TypeTransformer(TransformConfig.DefaultSchemaNormalization)
Expand Down

0 comments on commit b5ed82c

Please sign in to comment.