Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Protocol Tests #3227

Draft
wants to merge 11 commits into
base: develop
Choose a base branch
from
100 changes: 62 additions & 38 deletions botocore/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
from botocore.compat import ETree, XMLParseError
from botocore.eventstream import EventStream, NoInitialResponseError
from botocore.utils import (
ensure_boolean,
is_json_value_header,
lowercase_dict,
merge_dicts,
Expand Down Expand Up @@ -201,6 +202,10 @@ class ResponseParser:

DEFAULT_ENCODING = 'utf-8'
EVENT_STREAM_PARSER_CLS = None
# This is a list of known values for the "location" key in the
# serialization dict. The location key tells us where in the response
# to parse the value.
KNOWN_LOCATIONS = ('statusCode', 'header', 'headers')

def __init__(self, timestamp_parser=None, blob_parser=None):
if timestamp_parser is None:
Expand Down Expand Up @@ -356,14 +361,21 @@ def _has_unknown_tagged_union_member(self, shape, value):
if shape.is_tagged_union:
cleaned_value = value.copy()
cleaned_value.pop("__type", None)
cleaned_value = {
k: v for k, v in cleaned_value.items() if v is not None
}
if len(cleaned_value) != 1:
error_msg = (
"Invalid service response: %s must have one and only "
"one member set."
)
raise ResponseParserError(error_msg % shape.name)
tag = self._get_first_key(cleaned_value)
if tag not in shape.members:
serialized_member_names = [
shape.members[member].serialization.get('name', member)
for member in shape.members
]
if tag not in serialized_member_names:
msg = (
"Received a tagged union response with member "
"unknown to client: %s. Please upgrade SDK for full "
Expand Down Expand Up @@ -427,11 +439,12 @@ def _handle_structure(self, shape, node):
return self._handle_unknown_tagged_union_member(tag)
for member_name in members:
member_shape = members[member_name]
location = member_shape.serialization.get('location')
if (
'location' in member_shape.serialization
location in self.KNOWN_LOCATIONS
or member_shape.serialization.get('eventheader')
):
# All members with locations have already been handled,
# All members with known locations have already been handled,
# so we don't need to parse these members.
continue
xml_name = self._member_key_name(member_shape, member_name)
Expand Down Expand Up @@ -460,20 +473,7 @@ def _get_error_root(self, original_root):
return original_root

def _member_key_name(self, shape, member_name):
# This method is needed because we have to special case flattened list
# with a serialization name. If this is the case we use the
# locationName from the list's member shape as the key name for the
# surrounding structure.
if shape.type_name == 'list' and shape.serialization.get('flattened'):
list_member_serialized_name = shape.member.serialization.get(
'name'
)
if list_member_serialized_name is not None:
return list_member_serialized_name
serialized_name = shape.serialization.get('name')
if serialized_name is not None:
return serialized_name
return member_name
Comment on lines -463 to -476
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did we hoist all of this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From my understanding, the conditional logic we have defined here goes against the guidance provided in the Flattened list serialization guide.

The xmlName trait applied to the member of a list has no effect when serializing a flattened list into a structure/union. For example, given the following:

union Choice {
    @xmlFlattened
    flat: MyList
}

list MyList {
    @xmlName("Hi")
    member: String
}

The XML serialization of Choice is:

 <Choice>
     <flat>example1</flat>
     <flat>example2</flat>
     <flat>example3</flat>
 </Choice>

Copy link
Contributor Author

@jonathan343 jonathan343 Aug 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm I look another look at this and did find some outliers.
I wrote a script to parse all service model files to look for list shapes with "flattened":true and a locationName. The results are posted below:

Detecting flattened list shapes with a locationName:
s3control:
 * StorageLensConfigurationList - StorageLensConfiguration
 * StorageLensGroupList - StorageLensGroup
sdb:
 * AttributeList - Attribute
 * AttributeNameList - AttributeName
 * DeletableItemList - Item
 * DomainNameList - DomainName
 * ItemList - Item
 * ReplaceableAttributeList - Attribute
 * ReplaceableItemList - Item 

These services don't follow the expected behavior indicated by the Smithy guidance. Looking more into this.

return shape.serialization.get('name', member_name)

def _build_name_to_xml_node(self, parent_node):
# If the parent node is actually a list. We should not be trying
Expand Down Expand Up @@ -554,6 +554,8 @@ def _handle_blob(self, shape, text):


class QueryParser(BaseXMLResponseParser):
ROOT_NODE_SUFFIX = 'Result'

def _do_error_parse(self, response, shape):
xml_contents = response['body']
root = self._parse_xml_string_to_dom(xml_contents)
Expand Down Expand Up @@ -586,14 +588,23 @@ def _parse_body_as_xml(self, response, shape, inject_metadata=True):
start = self._find_result_wrapped_shape(
shape.serialization['resultWrapper'], root
)
else:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is another code path I find a little surprising. Why are we trying to guess about shapes in the model?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This logic was added in effort to align with the following serialization guidance:

Query:

The awsQuery protocol serializes XML responses within an XML root node with the name of the operation's output suffixed with "Response". A nested element, with the name of the operation's output suffixed with "Result", contains the contents of the successful response.
ref: https://smithy.io/2.0/aws/protocols/aws-query-protocol.html#response-serialization

EC2:

The ec2Query protocol serializes XML responses within an XML root node with the name of the operation's output suffixed with "Response", which contains the contents of the successful response.
ref: https://smithy.io/2.0/aws/protocols/aws-ec2-query-protocol.html#response-serialization

Note: The next revision doesn't hardcode "Result" and instead uses a ROOT_NODE_SUFFIX constant that is defined as ROOT_NODE_SUFFIX = 'Result' for QueryParser and ROOT_NODE_SUFFIX = 'Response' for EC2QueryParser.

operation_name = response.get("context", {}).get(
"operation_name", ""
)
inferred_wrapper = self._find_result_wrapped_shape(
f"{operation_name}{self.ROOT_NODE_SUFFIX}", root
)
if inferred_wrapper is not None:
start = inferred_wrapper
parsed = self._parse_shape(shape, start)
if inject_metadata:
self._inject_response_metadata(root, parsed)
return parsed

def _find_result_wrapped_shape(self, element_name, xml_root_node):
mapping = self._build_name_to_xml_node(xml_root_node)
return mapping[element_name]
return mapping.get(element_name)

def _inject_response_metadata(self, node, inject_into):
mapping = self._build_name_to_xml_node(node)
Expand All @@ -606,6 +617,8 @@ def _inject_response_metadata(self, node, inject_into):


class EC2QueryParser(QueryParser):
ROOT_NODE_SUFFIX = 'Response'

def _inject_response_metadata(self, node, inject_into):
mapping = self._build_name_to_xml_node(node)
child_node = mapping.get('requestId')
Expand Down Expand Up @@ -704,11 +717,19 @@ def _do_error_parse(self, response, shape):

code = body.get('__type', response_code and str(response_code))
if code is not None:
# The "Code" value can come from either a response
# header or a value in the JSON body.
if 'x-amzn-errortype' in response['headers']:
code = response['headers']['x-amzn-errortype']
elif 'code' in body or 'Code' in body:
code = body.get('code', body.get('Code', ''))
Comment on lines 718 to +725
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this block was originally for JSON RPC error-type handling. Are these changes all JSON RPC specific?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The protocol tests require us to support following requirement for operation error parsing:

The component MUST be one of the following: an additional header with the name X-Amzn-Errortype, a body field with the name code, or a body field named __type.

One update I will have to make is the preference of __type for JSON-1.1/1.0 and x-amzn-errortype for REST-JSON

# code has a couple forms as well:
# * "com.aws.dynamodb.vAPI#ProvisionedThroughputExceededException"
# * "ResourceNotFoundException"
if ':' in code:
code = code.split(':', 1)[0]
if '#' in code:
code = code.rsplit('#', 1)[1]
code = code.split('#', 1)[1]
if 'x-amzn-query-error' in headers:
code = self._do_query_compatible_error_parse(
code, headers, error
Expand Down Expand Up @@ -994,14 +1015,28 @@ def _handle_string(self, shape, value):
parsed = value
if is_json_value_header(shape):
decoded = base64.b64decode(value).decode(self.DEFAULT_ENCODING)
parsed = json.loads(decoded)
parsed = json.dumps(json.loads(decoded))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we changing the type returned here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I misunderstood this, but I assumed if this method is handling a string value, we should be returning the string representation of the parsed json object.

return parsed

def _handle_list_header(self, node):
# TODO: Clean up and consider timestamps.
TOKEN_PATTERN = r'[!#$%&\'*+\-.^_`|~\w]+'
QUOTED_STRING_PATTERN = r'"(?:[^"\\]|\\.)*"'
PATTERN = rf'({QUOTED_STRING_PATTERN}|{TOKEN_PATTERN})'
Comment on lines +1023 to +1025
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regex strings should be compiled ideally and we should do it once outside of the parsing function.

matches = re.findall(PATTERN, node)
parsed_values = []
for match in matches:
if match.startswith('"') and match.endswith('"'):
parsed_values.append(match[1:-1].replace('\\"', '"'))
Comment on lines +1029 to +1030
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we not use a group to avoid modifying the result?

else:
parsed_values.append(match)
return parsed_values

def _handle_list(self, shape, node):
location = shape.serialization.get('location')
if location == 'header' and not isinstance(node, list):
# List in headers may be a comma separated string as per RFC7230
node = [e.strip() for e in node.split(',')]
node = self._handle_list_header(node)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the token pattern includes whitespace? How are we preserving value normalization?

return super()._handle_list(shape, node)


Expand All @@ -1011,28 +1046,17 @@ class RestJSONParser(BaseRestParser, BaseJSONParser):
def _initial_body_parse(self, body_contents):
return self._parse_body_as_json(body_contents)

def _do_error_parse(self, response, shape):
error = super()._do_error_parse(response, shape)
self._inject_error_code(error, response)
return error

def _inject_error_code(self, error, response):
# The "Code" value can come from either a response
# header or a value in the JSON body.
body = self._initial_body_parse(response['body'])
if 'x-amzn-errortype' in response['headers']:
code = response['headers']['x-amzn-errortype']
# Could be:
# x-amzn-errortype: ValidationException:
code = code.split(':')[0]
error['Error']['Code'] = code
elif 'code' in body or 'Code' in body:
error['Error']['Code'] = body.get('code', body.get('Code', ''))
Comment on lines -1014 to -1030
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So this goes back to my comment above. This looks like it was specific to rest-json but we've moved it to the BaseJSONParser which will impact the JSON RPC protocol? Are we positive that's the desired outcome?

def _handle_boolean(self, shape, value):
return ensure_boolean(value)

def _handle_integer(self, shape, value):
return int(value)

def _handle_float(self, shape, value):
return float(value)

_handle_long = _handle_integer
_handle_double = _handle_float


class RestXMLParser(BaseRestParser, BaseXMLResponseParser):
Expand Down
Loading
Loading