diff --git a/tests/test_protocol_compact.py b/tests/test_protocol_compact.py new file mode 100644 index 0000000..6ea62ea --- /dev/null +++ b/tests/test_protocol_compact.py @@ -0,0 +1,256 @@ +# -*- coding: utf-8 -*- + +from io import BytesIO + +from thriftpy._compat import u +from thriftpy.thrift import TType, TPayload +from thriftpy.utils import hexlify +from thriftpy.protocol import compact + + +class TItem(TPayload): + thrift_spec = { + 1: (TType.I32, "id", False), + 2: (TType.LIST, "phones", (TType.STRING), False), + } + default_spec = [("id", None), ("phones", None)] + + +class TPkg(TPayload): + thrift_spec = { + 1: (TType.I32, "id", False), + 2: (TType.LIST, "items", (TType.STRUCT, TItem), False), + } + default_spec = [("id", None), ("items", None)] + + +def gen_proto(bytearray=b''): + b = BytesIO(bytearray) + proto = compact.TCompactProtocol(b) + return (b, proto) + + +def test_pack_byte(): + b, proto = gen_proto() + proto.write_val(TType.BYTE, 77) + assert "4d" == hexlify(b.getvalue()) + + +def test_unpack_byte(): + b, proto = gen_proto(b'\x4d') + assert 77 == proto.read_val(TType.BYTE) + + +def test_pack_i16(): + b, proto = gen_proto() + proto.write_val(TType.I16, 12345) + assert "f2 c0 01" == hexlify(b.getvalue()) + + +def test_unpack_i16(): + b, proto = gen_proto(b"\xf2\xc0\x01") + assert 12345 == proto.read_val(TType.I16) + + +def test_pack_i32(): + b, proto = gen_proto() + proto.write_val(TType.I32, 1234567890) + assert "a4 8b b0 99 09" == hexlify(b.getvalue()) + + +def test_unpack_i32(): + b, proto = gen_proto(b"\xa4\x8b\xb0\x99\x09") + assert 1234567890 == proto.read_val(TType.I32) + + +def test_pack_i64(): + b, proto = gen_proto() + proto.write_val(TType.I64, 1234567890123456789) + assert "aa 84 cc de 8f bd 88 a2 22" == hexlify(b.getvalue()) + + +def test_unpack_i64(): + b, proto = gen_proto(b"\xaa\x84\xcc\xde\x8f\xbd\x88\xa2\x22") + assert 1234567890123456789 == proto.read_val(TType.I64) + + +def test_pack_double(): + b, proto = gen_proto() + proto.write_val(TType.DOUBLE, 1234567890.1234567890) + assert "b7 e6 87 b4 80 65 d2 41" == hexlify(b.getvalue()) + + +def test_unpack_double(): + b, proto = gen_proto(b"\xb7\xe6\x87\xb4\x80\x65\xd2\x41") + assert 1234567890.1234567890 == proto.read_val(TType.DOUBLE) + + +def test_pack_string(): + b, proto = gen_proto() + proto.write_val(TType.STRING, "hello world!") + assert "0c 68 65 6c 6c 6f 20 77 6f 72 6c 64 21" == \ + hexlify(b.getvalue()) + + b1, proto1 = gen_proto() + proto1.write_val(TType.STRING, "你好世界") + assert "0c e4 bd a0 e5 a5 bd e4 b8 96 e7 95 8c" == \ + hexlify(b1.getvalue()) + + +def test_unpack_string(): + b, proto = gen_proto(b"\x0c\x68\x65\x6c\x6c\x6f" + b"\x20\x77\x6f\x72\x6c\x64\x21") + assert u('hello world!') == proto.read_val(TType.STRING) + + b, proto = gen_proto(b'\x0c\xe4\xbd\xa0\xe5\xa5' + b'\xbd\xe4\xb8\x96\xe7\x95\x8c') + assert u('你好世界') == proto.read_val(TType.STRING) + + +def test_pack_bool(): + b, proto = gen_proto() + proto.write_bool(True) + assert "01" == hexlify(b.getvalue()) + + +def test_unpack_bool(): + b, proto = gen_proto(b"\x01") + assert proto.read_bool() + + +def test_pack_container_bool(): + b, proto = gen_proto() + proto.write_val(TType.LIST, [True, False, True], TType.BOOL) + assert "31 01 02 01" == hexlify(b.getvalue()) + + b, proto = gen_proto() + proto.write_val(TType.MAP, {"a": True}, (TType.STRING, TType.BOOL)) + assert "01 81 01 61 01" == hexlify(b.getvalue()) + + b, proto = gen_proto() + proto.write_val(TType.MAP, {"a": [True, False]}, + (TType.STRING, (TType.LIST, TType.BOOL))) + assert "01 89 01 61 21 01 02" == hexlify(b.getvalue()) + + +def test_unpack_container_bool(): + b, proto = gen_proto(b"\x31\x01\x02\x01") + assert [True, False, True] == proto.read_val(TType.LIST, TType.BOOL) + + b, proto = gen_proto(b"\x01\x81\x01\x61\x01") + assert {u("a"): True} == proto.read_val(TType.MAP, + (TType.STRING, TType.BOOL)) + + b, proto = gen_proto(b"\x01\x89\x01\x61\x21\x01\x02") + assert {u("a"): [True, False]} == proto.read_val( + TType.MAP, (TType.STRING, (TType.LIST, TType.BOOL))) + + b, proto = gen_proto(b"\x03\x81\x01\x61\x01\x01\x63\x01\x01\x62\x02") + bool_hash = proto.read_val(TType.MAP, (TType.STRING, TType.BOOL)) + assert bool_hash['a'] is True + assert bool_hash['b'] is False + assert bool_hash['c'] is True + + +def test_pack_list(): + b, proto = gen_proto() + proto.write_val(TType.LIST, [1, 2, 3, 4, 5], TType.I16) + assert "54 02 04 06 08 0a" == hexlify(b.getvalue()) + + +def test_unpack_list(): + b, proto = gen_proto(b"\x54\x02\x04\x06\x08\x0a") + assert [1, 2, 3, 4, 5] == proto.read_val(TType.LIST, TType.I16) + + +def test_pack_map(): + b, proto = gen_proto() + proto.write_val(TType.MAP, {'a': 2}, (TType.STRING, TType.I16)) + assert "01 84 01 61 04" == hexlify(b.getvalue()) + + +def test_unpack_map(): + b, proto = gen_proto(b"\x01\x84\x01\x61\x04") + assert {u'a': 2} == proto.read_val(TType.MAP, (TType.STRING, TType.I16)) + + +def test_write_message_begin(): + b, proto = gen_proto() + proto.write_message_begin("test", 2, 1) + assert "82 41 01 04 74 65 73 74" == \ + hexlify(b.getvalue()) + + +def test_read_message_begin(): + b, proto = gen_proto(b"\x82\x41\x01\x04\x74\x65\x73\x74") + res = proto.read_message_begin() + assert res == ("test", 2, 1) + + +def test_write_struct(): + b, proto = gen_proto() + item = TItem(id=123, phones=["123456", "abcdef"]) + proto.write_struct(item) + assert ("15 f6 01 19 28 06 31 32 33 34 " + "35 36 06 61 62 63 64 65 66 00" == hexlify(b.getvalue())) + + +def test_write_struct2(): + b, proto = gen_proto() + item = TItem(id=123, phones=["123456", "abcdef"]) + proto.write_val(TType.STRUCT, item) + assert ("15 f6 01 19 28 06 31 32 33 34 " + "35 36 06 61 62 63 64 65 66 00" == hexlify(b.getvalue())) + + +def test_read_struct(): + b, proto = gen_proto(b"\x15\xf6\x01\x19\x28\x06\x31\x32\x33\x34" + b"\x35\x36\x06\x61\x62\x63\x64\x65\x66\x00") + _item = TItem(id=123, phones=["123456", "abcdef"]) + _item2 = TItem() + proto.read_struct(_item2) + assert _item == _item2 + + +def test_write_struct_recur(): + b, proto = gen_proto() + item1 = TItem(id=123, phones=["123456", "abcdef"]) + item2 = TItem(id=456, phones=["123456", "abcdef"]) + pkg = TPkg(id=123, items=[item1, item2]) + proto.write_val(TType.STRUCT, pkg) + assert ("15 f6 01 19 2c 15 f6 01 19 28 06 31 32 33 34 35 36 06 61 62 63 " + "64 65 66 00 15 90 07 19 28 06 31 32 33 34 35 36 06 61 62 63 64 " + "65 66 00 00" == hexlify(b.getvalue())) + + +def test_read_struct_recur(): + b, proto = gen_proto(b'\x15\xf6\x01\x19,\x15\xf6\x01\x19(\x06123456\x06' + b'abcdef\x00\x15\x90\x07\x19(\x06123456\x06abcdef' + b'\x00\x00') + pkg = TPkg() + proto.read_struct(pkg) + item1 = TItem(id=123, phones=["123456", "abcdef"]) + item2 = TItem(id=456, phones=["123456", "abcdef"]) + _pkg = TPkg(id=123, items=[item1, item2]) + assert _pkg == pkg + + +def test_write_empty_struct(): + b, proto = gen_proto() + item = TItem() + proto.write_struct(item) + assert "00" == hexlify(b.getvalue()) + + +def test_read_empty_struct(): + b, proto = gen_proto(b"\x00") + _item = TItem() + _item2 = TItem() + proto.read_struct(_item2) + assert _item == _item2 + + +def test_write_huge_struct(): + b, proto = gen_proto() + item = TItem(id=12345, phones=["1234567890"] * 100000) + proto.write_struct(item) diff --git a/thriftpy/protocol/__init__.py b/thriftpy/protocol/__init__.py index f26bd99..0e80219 100644 --- a/thriftpy/protocol/__init__.py +++ b/thriftpy/protocol/__init__.py @@ -4,6 +4,7 @@ from .binary import TBinaryProtocol, TBinaryProtocolFactory from .json import TJSONProtocol, TJSONProtocolFactory +from .compact import TCompactProtocol, TCompactProtocolFactory from .multiplex import TMultiplexedProtocol, TMultiplexedProtocolFactory from thriftpy._compat import PYPY, CYTHON @@ -21,4 +22,5 @@ __all__ = ['TBinaryProtocol', 'TBinaryProtocolFactory', 'TCyBinaryProtocol', 'TCyBinaryProtocolFactory', 'TJSONProtocol', 'TJSONProtocolFactory', - 'TMultiplexedProtocol', 'TMultiplexedProtocolFactory'] + 'TMultiplexedProtocol', 'TMultiplexedProtocolFactory', + 'TCompactProtocol', 'TCompactProtocolFactory'] diff --git a/thriftpy/protocol/compact.py b/thriftpy/protocol/compact.py new file mode 100644 index 0000000..6add672 --- /dev/null +++ b/thriftpy/protocol/compact.py @@ -0,0 +1,569 @@ +# -*- coding: utf-8 -*- + +from __future__ import absolute_import + +import array +from struct import pack, unpack + +from .exc import TProtocolException +from ..thrift import TException +from ..thrift import TType + +from thriftpy._compat import PY3 + + +CLEAR = 0 +FIELD_WRITE = 1 +VALUE_WRITE = 2 +CONTAINER_WRITE = 3 +BOOL_WRITE = 4 +FIELD_READ = 5 +CONTAINER_READ = 6 +VALUE_READ = 7 +BOOL_READ = 8 + + +def check_integer_limits(i, bits): + if bits == 8 and (i < -128 or i > 127): + raise TProtocolException(TProtocolException.INVALID_DATA, + "i8 requires -128 <= number <= 127") + elif bits == 16 and (i < -32768 or i > 32767): + raise TProtocolException(TProtocolException.INVALID_DATA, + "i16 requires -32768 <= number <= 32767") + elif bits == 32 and (i < -2147483648 or i > 2147483647): + raise TProtocolException( + TProtocolException.INVALID_DATA, + "i32 requires -2147483648 <= number <= 2147483647") + elif bits == 64 and (i < -9223372036854775808 or i > 9223372036854775807): + raise TProtocolException( + TProtocolException.INVALID_DATA, + "i64 requires -9223372036854775808 <= number <= \ + 9223372036854775807") + + +def make_zig_zag(n, bits): + check_integer_limits(n, bits) + return (n << 1) ^ (n >> (bits - 1)) + + +def from_zig_zag(n): + return (n >> 1) ^ -(n & 1) + + +def write_varint(trans, n): + out = [] + while True: + if n & ~0x7f == 0: + out.append(n) + break + else: + out.append((n & 0xff) | 0x80) + n = n >> 7 + data = array.array('B', out).tostring() + + if PY3: + trans.write(data) + else: + trans.write(bytes(data)) + + +def read_varint(trans): + result = 0 + shift = 0 + + while True: + x = trans.read(1) + byte = ord(x) + result |= (byte & 0x7f) << shift + if byte >> 7 == 0: + return result + shift += 7 + + +class CompactType(object): + STOP = 0x00 + TRUE = 0x01 + FALSE = 0x02 + BYTE = 0x03 + I16 = 0x04 + I32 = 0x05 + I64 = 0x06 + DOUBLE = 0x07 + BINARY = 0x08 + LIST = 0x09 + SET = 0x0A + MAP = 0x0B + STRUCT = 0x0C + + +CTYPES = { + TType.STOP: CompactType.STOP, + TType.BOOL: CompactType.TRUE, + TType.BYTE: CompactType.BYTE, + TType.I16: CompactType.I16, + TType.I32: CompactType.I32, + TType.I64: CompactType.I64, + TType.DOUBLE: CompactType.DOUBLE, + TType.STRING: CompactType.BINARY, + TType.STRUCT: CompactType.STRUCT, + TType.LIST: CompactType.LIST, + TType.SET: CompactType.SET, + TType.MAP: CompactType.MAP +} +TTYPES = dict((v, k) for k, v in CTYPES.items()) +TTYPES[CompactType.FALSE] = TType.BOOL + + +class TCompactProtocol(object): + """Compact implementation of the Thrift protocol driver.""" + PROTOCOL_ID = 0x82 + VERSION = 1 + VERSION_MASK = 0x1f + TYPE_MASK = 0xe0 + TYPE_BITS = 0x07 + TYPE_SHIFT_AMOUNT = 5 + + def __init__(self, trans): + self.trans = trans + self.__last_fid = 0 + self.__bool_fid = None + self.__bool_value = None + self.__structs = [] + + def __getTType(self, byte): + return TTYPES[byte & 0x0f] + + def __read_size(self): + result = read_varint(self.trans) + if result < 0: + raise TException("Length < 0") + return result + + def read_message_begin(self): + proto_id = self.read_ubyte() + if proto_id != self.PROTOCOL_ID: + raise TProtocolException(TProtocolException.BAD_VERSION, + 'Bad protocol id in the message: %d' + % proto_id) + + ver_type = self.read_ubyte() + type = (ver_type >> self.TYPE_SHIFT_AMOUNT) & self.TYPE_BITS + version = ver_type & self.VERSION_MASK + if version != self.VERSION: + raise TProtocolException(TProtocolException.BAD_VERSION, + 'Bad version: %d (expect %d)' + % (version, self.VERSION)) + seqid = read_varint(self.trans) + name = self.read_string() + return (name, type, seqid) + + def read_message_end(self): + assert len(self.__structs) == 0 + + def read_field_begin(self): + type = self.read_ubyte() + if type & 0x0f == TType.STOP: + return (None, 0, 0) + delta = type >> 4 + if delta == 0: + fid = from_zig_zag(read_varint(self.trans)) + else: + fid = self.__last_fid + delta + self.__last_fid = fid + type = type & 0x0f + if type == CompactType.TRUE: + self.__bool_value = True + elif type == CompactType.FALSE: + self.__bool_value = False + else: + pass + return (None, self.__getTType(type), fid) + + def read_field_end(self): + pass + + def read_struct_begin(self): + self.__structs.append(self.__last_fid) + self.__last_fid = 0 + + def read_struct_end(self): + self.__last_fid = self.__structs.pop() + + def read_map_begin(self): + size = self.__read_size() + types = 0 + if size > 0: + types = self.read_ubyte() + vtype = self.__getTType(types) + ktype = self.__getTType(types >> 4) + return (ktype, vtype, size) + + def read_collection_begin(self): + size_type = self.read_ubyte() + size = size_type >> 4 + type = self.__getTType(size_type) + if size == 15: + size = self.__read_size() + return type, size + + def read_collection_end(self): + pass + + def read_byte(self): + result, = unpack('!b', self.trans.read(1)) + return result + + def read_ubyte(self): + result, = unpack('!B', self.trans.read(1)) + return result + + def read_int(self): + return from_zig_zag(read_varint(self.trans)) + + def read_double(self): + buff = self.trans.read(8) + val, = unpack(' self.__last_fid \ + and self.__bool_fid - self.__last_fid <= 15: + if bool: + ctype = CompactType.TRUE + else: + ctype = CompactType.FALSE + self.__write_field_header(ctype, self.__bool_fid) + else: + if bool: + self.write_byte(CompactType.TRUE) + else: + self.write_byte(CompactType.FALSE) + + def write_i16(self, i16): + write_varint(self.trans, make_zig_zag(i16, 16)) + + def write_i32(self, i32): + write_varint(self.trans, make_zig_zag(i32, 32)) + + def write_i64(self, i64): + write_varint(self.trans, make_zig_zag(i64, 64)) + + def write_double(self, dub): + self.trans.write(pack('