-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Update Protocol Tests #3227
base: develop
Are you sure you want to change the base?
Update Protocol Tests #3227
Changes from all commits
45ba138
4714a35
9a94f30
0559d2e
772fd81
6d55023
7db1cf3
752465c
35aeaf6
6afd162
2ec3a18
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -124,6 +124,7 @@ | |
from botocore.compat import ETree, XMLParseError | ||
from botocore.eventstream import EventStream, NoInitialResponseError | ||
from botocore.utils import ( | ||
ensure_boolean, | ||
is_json_value_header, | ||
lowercase_dict, | ||
merge_dicts, | ||
|
@@ -201,6 +202,10 @@ class ResponseParser: | |
|
||
DEFAULT_ENCODING = 'utf-8' | ||
EVENT_STREAM_PARSER_CLS = None | ||
# This is a list of known values for the "location" key in the | ||
# serialization dict. The location key tells us where in the response | ||
# to parse the value. | ||
KNOWN_LOCATIONS = ('statusCode', 'header', 'headers') | ||
|
||
def __init__(self, timestamp_parser=None, blob_parser=None): | ||
if timestamp_parser is None: | ||
|
@@ -356,14 +361,21 @@ def _has_unknown_tagged_union_member(self, shape, value): | |
if shape.is_tagged_union: | ||
cleaned_value = value.copy() | ||
cleaned_value.pop("__type", None) | ||
cleaned_value = { | ||
k: v for k, v in cleaned_value.items() if v is not None | ||
} | ||
if len(cleaned_value) != 1: | ||
error_msg = ( | ||
"Invalid service response: %s must have one and only " | ||
"one member set." | ||
) | ||
raise ResponseParserError(error_msg % shape.name) | ||
tag = self._get_first_key(cleaned_value) | ||
if tag not in shape.members: | ||
serialized_member_names = [ | ||
shape.members[member].serialization.get('name', member) | ||
for member in shape.members | ||
] | ||
if tag not in serialized_member_names: | ||
msg = ( | ||
"Received a tagged union response with member " | ||
"unknown to client: %s. Please upgrade SDK for full " | ||
|
@@ -427,11 +439,12 @@ def _handle_structure(self, shape, node): | |
return self._handle_unknown_tagged_union_member(tag) | ||
for member_name in members: | ||
member_shape = members[member_name] | ||
location = member_shape.serialization.get('location') | ||
if ( | ||
'location' in member_shape.serialization | ||
location in self.KNOWN_LOCATIONS | ||
or member_shape.serialization.get('eventheader') | ||
): | ||
# All members with locations have already been handled, | ||
# All members with known locations have already been handled, | ||
# so we don't need to parse these members. | ||
continue | ||
xml_name = self._member_key_name(member_shape, member_name) | ||
|
@@ -460,20 +473,7 @@ def _get_error_root(self, original_root): | |
return original_root | ||
|
||
def _member_key_name(self, shape, member_name): | ||
# This method is needed because we have to special case flattened list | ||
# with a serialization name. If this is the case we use the | ||
# locationName from the list's member shape as the key name for the | ||
# surrounding structure. | ||
if shape.type_name == 'list' and shape.serialization.get('flattened'): | ||
list_member_serialized_name = shape.member.serialization.get( | ||
'name' | ||
) | ||
if list_member_serialized_name is not None: | ||
return list_member_serialized_name | ||
serialized_name = shape.serialization.get('name') | ||
if serialized_name is not None: | ||
return serialized_name | ||
return member_name | ||
return shape.serialization.get('name', member_name) | ||
|
||
def _build_name_to_xml_node(self, parent_node): | ||
# If the parent node is actually a list. We should not be trying | ||
|
@@ -554,6 +554,8 @@ def _handle_blob(self, shape, text): | |
|
||
|
||
class QueryParser(BaseXMLResponseParser): | ||
ROOT_NODE_SUFFIX = 'Result' | ||
|
||
def _do_error_parse(self, response, shape): | ||
xml_contents = response['body'] | ||
root = self._parse_xml_string_to_dom(xml_contents) | ||
|
@@ -586,14 +588,23 @@ def _parse_body_as_xml(self, response, shape, inject_metadata=True): | |
start = self._find_result_wrapped_shape( | ||
shape.serialization['resultWrapper'], root | ||
) | ||
else: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is another code path I find a little surprising. Why are we trying to guess about shapes in the model? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This logic was added in effort to align with the following serialization guidance: Query:
EC2:
Note: The next revision doesn't hardcode |
||
operation_name = response.get("context", {}).get( | ||
"operation_name", "" | ||
) | ||
inferred_wrapper = self._find_result_wrapped_shape( | ||
f"{operation_name}{self.ROOT_NODE_SUFFIX}", root | ||
) | ||
if inferred_wrapper is not None: | ||
start = inferred_wrapper | ||
parsed = self._parse_shape(shape, start) | ||
if inject_metadata: | ||
self._inject_response_metadata(root, parsed) | ||
return parsed | ||
|
||
def _find_result_wrapped_shape(self, element_name, xml_root_node): | ||
mapping = self._build_name_to_xml_node(xml_root_node) | ||
return mapping[element_name] | ||
return mapping.get(element_name) | ||
|
||
def _inject_response_metadata(self, node, inject_into): | ||
mapping = self._build_name_to_xml_node(node) | ||
|
@@ -606,6 +617,8 @@ def _inject_response_metadata(self, node, inject_into): | |
|
||
|
||
class EC2QueryParser(QueryParser): | ||
ROOT_NODE_SUFFIX = 'Response' | ||
|
||
def _inject_response_metadata(self, node, inject_into): | ||
mapping = self._build_name_to_xml_node(node) | ||
child_node = mapping.get('requestId') | ||
|
@@ -704,11 +717,19 @@ def _do_error_parse(self, response, shape): | |
|
||
code = body.get('__type', response_code and str(response_code)) | ||
if code is not None: | ||
# The "Code" value can come from either a response | ||
# header or a value in the JSON body. | ||
if 'x-amzn-errortype' in response['headers']: | ||
code = response['headers']['x-amzn-errortype'] | ||
elif 'code' in body or 'Code' in body: | ||
code = body.get('code', body.get('Code', '')) | ||
Comment on lines
718
to
+725
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I believe this block was originally for JSON RPC error-type handling. Are these changes all JSON RPC specific? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The protocol tests require us to support following requirement for operation error parsing:
One update I will have to make is the preference of |
||
# code has a couple forms as well: | ||
# * "com.aws.dynamodb.vAPI#ProvisionedThroughputExceededException" | ||
# * "ResourceNotFoundException" | ||
if ':' in code: | ||
code = code.split(':', 1)[0] | ||
if '#' in code: | ||
code = code.rsplit('#', 1)[1] | ||
code = code.split('#', 1)[1] | ||
if 'x-amzn-query-error' in headers: | ||
code = self._do_query_compatible_error_parse( | ||
code, headers, error | ||
|
@@ -994,14 +1015,28 @@ def _handle_string(self, shape, value): | |
parsed = value | ||
if is_json_value_header(shape): | ||
decoded = base64.b64decode(value).decode(self.DEFAULT_ENCODING) | ||
parsed = json.loads(decoded) | ||
parsed = json.dumps(json.loads(decoded)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we changing the type returned here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe I misunderstood this, but I assumed if this method is handling a string value, we should be returning the string representation of the parsed json object. |
||
return parsed | ||
|
||
def _handle_list_header(self, node): | ||
# TODO: Clean up and consider timestamps. | ||
TOKEN_PATTERN = r'[!#$%&\'*+\-.^_`|~\w]+' | ||
QUOTED_STRING_PATTERN = r'"(?:[^"\\]|\\.)*"' | ||
PATTERN = rf'({QUOTED_STRING_PATTERN}|{TOKEN_PATTERN})' | ||
Comment on lines
+1023
to
+1025
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Regex strings should be compiled ideally and we should do it once outside of the parsing function. |
||
matches = re.findall(PATTERN, node) | ||
parsed_values = [] | ||
for match in matches: | ||
if match.startswith('"') and match.endswith('"'): | ||
parsed_values.append(match[1:-1].replace('\\"', '"')) | ||
Comment on lines
+1029
to
+1030
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we not use a group to avoid modifying the result? |
||
else: | ||
parsed_values.append(match) | ||
return parsed_values | ||
|
||
def _handle_list(self, shape, node): | ||
location = shape.serialization.get('location') | ||
if location == 'header' and not isinstance(node, list): | ||
# List in headers may be a comma separated string as per RFC7230 | ||
node = [e.strip() for e in node.split(',')] | ||
node = self._handle_list_header(node) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks like the token pattern includes whitespace? How are we preserving value normalization? |
||
return super()._handle_list(shape, node) | ||
|
||
|
||
|
@@ -1011,28 +1046,17 @@ class RestJSONParser(BaseRestParser, BaseJSONParser): | |
def _initial_body_parse(self, body_contents): | ||
return self._parse_body_as_json(body_contents) | ||
|
||
def _do_error_parse(self, response, shape): | ||
error = super()._do_error_parse(response, shape) | ||
self._inject_error_code(error, response) | ||
return error | ||
|
||
def _inject_error_code(self, error, response): | ||
# The "Code" value can come from either a response | ||
# header or a value in the JSON body. | ||
body = self._initial_body_parse(response['body']) | ||
if 'x-amzn-errortype' in response['headers']: | ||
code = response['headers']['x-amzn-errortype'] | ||
# Could be: | ||
# x-amzn-errortype: ValidationException: | ||
code = code.split(':')[0] | ||
error['Error']['Code'] = code | ||
elif 'code' in body or 'Code' in body: | ||
error['Error']['Code'] = body.get('code', body.get('Code', '')) | ||
Comment on lines
-1014
to
-1030
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So this goes back to my comment above. This looks like it was specific to |
||
def _handle_boolean(self, shape, value): | ||
return ensure_boolean(value) | ||
|
||
def _handle_integer(self, shape, value): | ||
return int(value) | ||
|
||
def _handle_float(self, shape, value): | ||
return float(value) | ||
|
||
_handle_long = _handle_integer | ||
_handle_double = _handle_float | ||
|
||
|
||
class RestXMLParser(BaseRestParser, BaseXMLResponseParser): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why did we hoist all of this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
From my understanding, the conditional logic we have defined here goes against the guidance provided in the Flattened list serialization guide.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm I look another look at this and did find some outliers.
I wrote a script to parse all service model files to look for list shapes with
"flattened":true
and alocationName
. The results are posted below:These services don't follow the expected behavior indicated by the Smithy guidance. Looking more into this.