diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc index ca391b4354c07..ea044f321adac 100644 --- a/cpp/src/arrow/dataset/file_parquet.cc +++ b/cpp/src/arrow/dataset/file_parquet.cc @@ -133,6 +133,8 @@ parquet::ArrowReaderProperties MakeArrowReaderProperties( arrow_properties.set_io_context( parquet_scan_options.arrow_reader_properties->io_context()); arrow_properties.set_use_threads(options.use_threads); + arrow_properties.set_arrow_extensions_enabled( + parquet_scan_options.arrow_reader_properties->get_arrow_extensions_enabled()); return arrow_properties; } diff --git a/cpp/src/generated/parquet_types.cpp b/cpp/src/generated/parquet_types.cpp index 1ba0c4626233f..3a8708e60c6f1 100644 --- a/cpp/src/generated/parquet_types.cpp +++ b/cpp/src/generated/parquet_types.cpp @@ -1,5 +1,5 @@ /** - * Autogenerated by Thrift Compiler (0.19.0) + * Autogenerated by Thrift Compiler (0.21.0) * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * @generated @@ -73,17 +73,17 @@ int _kConvertedTypeValues[] = { */ ConvertedType::LIST, /** - * an enum is converted into a binary field + * an enum is converted into a BYTE_ARRAY field */ ConvertedType::ENUM, /** * A decimal value. - * - * This may be used to annotate binary or fixed primitive types. The - * underlying byte array stores the unscaled value encoded as two's + * + * This may be used to annotate BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY primitive + * types. The underlying byte array stores the unscaled value encoded as two's * complement using big-endian byte order (the most significant byte is the * zeroth element). The value of the decimal is the value * 10^{-scale}. - * + * * This must be accompanied by a (maximum) precision and a scale in the * SchemaElement. The precision specifies the number of digits in the decimal * and the scale stores the location of the decimal point. For example 1.23 @@ -93,47 +93,47 @@ int _kConvertedTypeValues[] = { ConvertedType::DECIMAL, /** * A Date - * + * * Stored as days since Unix epoch, encoded as the INT32 physical type. - * + * */ ConvertedType::DATE, /** * A time - * + * * The total number of milliseconds since midnight. The value is stored * as an INT32 physical type. */ ConvertedType::TIME_MILLIS, /** * A time. - * + * * The total number of microseconds since midnight. The value is stored as * an INT64 physical type. */ ConvertedType::TIME_MICROS, /** * A date/time combination - * + * * Date and time recorded as milliseconds since the Unix epoch. Recorded as * a physical type of INT64. */ ConvertedType::TIMESTAMP_MILLIS, /** * A date/time combination - * + * * Date and time recorded as microseconds since the Unix epoch. The value is * stored as an INT64 physical type. */ ConvertedType::TIMESTAMP_MICROS, /** * An unsigned integer value. - * + * * The number describes the maximum number of meaningful data bits in * the stored value. 8, 16 and 32 bit values are stored using the * INT32 physical type. 64 bit values are stored using the INT64 * physical type. - * + * */ ConvertedType::UINT_8, ConvertedType::UINT_16, @@ -141,12 +141,12 @@ int _kConvertedTypeValues[] = { ConvertedType::UINT_64, /** * A signed integer value. - * + * * The number describes the maximum number of meaningful data bits in * the stored value. 8, 16 and 32 bit values are stored using the * INT32 physical type. 64 bit values are stored using the INT64 * physical type. - * + * */ ConvertedType::INT_8, ConvertedType::INT_16, @@ -154,19 +154,19 @@ int _kConvertedTypeValues[] = { ConvertedType::INT_64, /** * An embedded JSON document - * + * * A JSON document embedded within a single UTF8 column. */ ConvertedType::JSON, /** * An embedded BSON document - * - * A BSON document embedded within a single BINARY column. + * + * A BSON document embedded within a single BYTE_ARRAY column. */ ConvertedType::BSON, /** * An interval of time - * + * * This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12 * This data is composed of three separate little endian unsigned * integers. Each stores a component of a duration of time. The first @@ -197,17 +197,17 @@ const char* _kConvertedTypeNames[] = { */ "LIST", /** - * an enum is converted into a binary field + * an enum is converted into a BYTE_ARRAY field */ "ENUM", /** * A decimal value. - * - * This may be used to annotate binary or fixed primitive types. The - * underlying byte array stores the unscaled value encoded as two's + * + * This may be used to annotate BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY primitive + * types. The underlying byte array stores the unscaled value encoded as two's * complement using big-endian byte order (the most significant byte is the * zeroth element). The value of the decimal is the value * 10^{-scale}. - * + * * This must be accompanied by a (maximum) precision and a scale in the * SchemaElement. The precision specifies the number of digits in the decimal * and the scale stores the location of the decimal point. For example 1.23 @@ -217,47 +217,47 @@ const char* _kConvertedTypeNames[] = { "DECIMAL", /** * A Date - * + * * Stored as days since Unix epoch, encoded as the INT32 physical type. - * + * */ "DATE", /** * A time - * + * * The total number of milliseconds since midnight. The value is stored * as an INT32 physical type. */ "TIME_MILLIS", /** * A time. - * + * * The total number of microseconds since midnight. The value is stored as * an INT64 physical type. */ "TIME_MICROS", /** * A date/time combination - * + * * Date and time recorded as milliseconds since the Unix epoch. Recorded as * a physical type of INT64. */ "TIMESTAMP_MILLIS", /** * A date/time combination - * + * * Date and time recorded as microseconds since the Unix epoch. The value is * stored as an INT64 physical type. */ "TIMESTAMP_MICROS", /** * An unsigned integer value. - * + * * The number describes the maximum number of meaningful data bits in * the stored value. 8, 16 and 32 bit values are stored using the * INT32 physical type. 64 bit values are stored using the INT64 * physical type. - * + * */ "UINT_8", "UINT_16", @@ -265,12 +265,12 @@ const char* _kConvertedTypeNames[] = { "UINT_64", /** * A signed integer value. - * + * * The number describes the maximum number of meaningful data bits in * the stored value. 8, 16 and 32 bit values are stored using the * INT32 physical type. 64 bit values are stored using the INT64 * physical type. - * + * */ "INT_8", "INT_16", @@ -278,19 +278,19 @@ const char* _kConvertedTypeNames[] = { "INT_64", /** * An embedded JSON document - * + * * A JSON document embedded within a single UTF8 column. */ "JSON", /** * An embedded BSON document - * - * A BSON document embedded within a single BINARY column. + * + * A BSON document embedded within a single BYTE_ARRAY column. */ "BSON", /** * An interval of time - * + * * This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12 * This data is composed of three separate little endian unsigned * integers. Each stores a component of a duration of time. The first @@ -325,11 +325,11 @@ std::string to_string(const ConvertedType::type& val) { int _kFieldRepetitionTypeValues[] = { /** - * This field is required (can not be null) and each record has exactly 1 value. + * This field is required (can not be null) and each row has exactly 1 value. */ FieldRepetitionType::REQUIRED, /** - * The field is optional (can be null) and each record has 0 or 1 values. + * The field is optional (can be null) and each row has 0 or 1 values. */ FieldRepetitionType::OPTIONAL, /** @@ -339,11 +339,11 @@ int _kFieldRepetitionTypeValues[] = { }; const char* _kFieldRepetitionTypeNames[] = { /** - * This field is required (can not be null) and each record has exactly 1 value. + * This field is required (can not be null) and each row has exactly 1 value. */ "REQUIRED", /** - * The field is optional (can be null) and each record has 0 or 1 values. + * The field is optional (can be null) and each row has 0 or 1 values. */ "OPTIONAL", /** @@ -372,6 +372,41 @@ std::string to_string(const FieldRepetitionType::type& val) { } } +int _kEdgeInterpolationAlgorithmValues[] = { + EdgeInterpolationAlgorithm::SPHERICAL, + EdgeInterpolationAlgorithm::VINCENTY, + EdgeInterpolationAlgorithm::THOMAS, + EdgeInterpolationAlgorithm::ANDOYER, + EdgeInterpolationAlgorithm::KARNEY +}; +const char* _kEdgeInterpolationAlgorithmNames[] = { + "SPHERICAL", + "VINCENTY", + "THOMAS", + "ANDOYER", + "KARNEY" +}; +const std::map _EdgeInterpolationAlgorithm_VALUES_TO_NAMES(::apache::thrift::TEnumIterator(5, _kEdgeInterpolationAlgorithmValues, _kEdgeInterpolationAlgorithmNames), ::apache::thrift::TEnumIterator(-1, nullptr, nullptr)); + +std::ostream& operator<<(std::ostream& out, const EdgeInterpolationAlgorithm::type& val) { + std::map::const_iterator it = _EdgeInterpolationAlgorithm_VALUES_TO_NAMES.find(val); + if (it != _EdgeInterpolationAlgorithm_VALUES_TO_NAMES.end()) { + out << it->second; + } else { + out << static_cast(val); + } + return out; +} + +std::string to_string(const EdgeInterpolationAlgorithm::type& val) { + std::map::const_iterator it = _EdgeInterpolationAlgorithm_VALUES_TO_NAMES.find(val); + if (it != _EdgeInterpolationAlgorithm_VALUES_TO_NAMES.end()) { + return std::string(it->second); + } else { + return std::to_string(static_cast(val)); + } +} + int _kEncodingValues[] = { /** * Default encoding. @@ -421,12 +456,15 @@ int _kEncodingValues[] = { */ Encoding::RLE_DICTIONARY, /** - * Encoding for floating-point data. + * Encoding for fixed-width data (FLOAT, DOUBLE, INT32, INT64, FIXED_LEN_BYTE_ARRAY). * K byte-streams are created where K is the size in bytes of the data type. - * The individual bytes of an FP value are scattered to the corresponding stream and + * The individual bytes of a value are scattered to the corresponding stream and * the streams are concatenated. * This itself does not reduce the size of the data but can lead to better compression * afterwards. + * + * Added in 2.8 for FLOAT and DOUBLE. + * Support for INT32, INT64 and FIXED_LEN_BYTE_ARRAY added in 2.11. */ Encoding::BYTE_STREAM_SPLIT }; @@ -479,12 +517,15 @@ const char* _kEncodingNames[] = { */ "RLE_DICTIONARY", /** - * Encoding for floating-point data. + * Encoding for fixed-width data (FLOAT, DOUBLE, INT32, INT64, FIXED_LEN_BYTE_ARRAY). * K byte-streams are created where K is the size in bytes of the data type. - * The individual bytes of an FP value are scattered to the corresponding stream and + * The individual bytes of a value are scattered to the corresponding stream and * the streams are concatenated. * This itself does not reduce the size of the data but can lead to better compression * afterwards. + * + * Added in 2.8 for FLOAT and DOUBLE. + * Support for INT32, INT64 and FIXED_LEN_BYTE_ARRAY added in 2.11. */ "BYTE_STREAM_SPLIT" }; @@ -618,6 +659,9 @@ std::string to_string(const BoundaryOrder::type& val) { SizeStatistics::~SizeStatistics() noexcept { } +SizeStatistics::SizeStatistics() noexcept + : unencoded_byte_array_data_bytes(0) { +} void SizeStatistics::__set_unencoded_byte_array_data_bytes(const int64_t val) { this->unencoded_byte_array_data_bytes = val; @@ -648,6 +692,23 @@ void swap(SizeStatistics &a, SizeStatistics &b) { swap(a.__isset, b.__isset); } +bool SizeStatistics::operator==(const SizeStatistics & rhs) const +{ + if (__isset.unencoded_byte_array_data_bytes != rhs.__isset.unencoded_byte_array_data_bytes) + return false; + else if (__isset.unencoded_byte_array_data_bytes && !(unencoded_byte_array_data_bytes == rhs.unencoded_byte_array_data_bytes)) + return false; + if (__isset.repetition_level_histogram != rhs.__isset.repetition_level_histogram) + return false; + else if (__isset.repetition_level_histogram && !(repetition_level_histogram == rhs.repetition_level_histogram)) + return false; + if (__isset.definition_level_histogram != rhs.__isset.definition_level_histogram) + return false; + else if (__isset.definition_level_histogram && !(definition_level_histogram == rhs.definition_level_histogram)) + return false; + return true; +} + SizeStatistics::SizeStatistics(const SizeStatistics& other12) { unencoded_byte_array_data_bytes = other12.unencoded_byte_array_data_bytes; repetition_level_histogram = other12.repetition_level_histogram; @@ -684,9 +745,251 @@ void SizeStatistics::printTo(std::ostream& out) const { } +BoundingBox::~BoundingBox() noexcept { +} + +BoundingBox::BoundingBox() noexcept + : xmin(0), + xmax(0), + ymin(0), + ymax(0), + zmin(0), + zmax(0), + mmin(0), + mmax(0) { +} + +void BoundingBox::__set_xmin(const double val) { + this->xmin = val; +} + +void BoundingBox::__set_xmax(const double val) { + this->xmax = val; +} + +void BoundingBox::__set_ymin(const double val) { + this->ymin = val; +} + +void BoundingBox::__set_ymax(const double val) { + this->ymax = val; +} + +void BoundingBox::__set_zmin(const double val) { + this->zmin = val; +__isset.zmin = true; +} + +void BoundingBox::__set_zmax(const double val) { + this->zmax = val; +__isset.zmax = true; +} + +void BoundingBox::__set_mmin(const double val) { + this->mmin = val; +__isset.mmin = true; +} + +void BoundingBox::__set_mmax(const double val) { + this->mmax = val; +__isset.mmax = true; +} +std::ostream& operator<<(std::ostream& out, const BoundingBox& obj) +{ + obj.printTo(out); + return out; +} + + +void swap(BoundingBox &a, BoundingBox &b) { + using ::std::swap; + swap(a.xmin, b.xmin); + swap(a.xmax, b.xmax); + swap(a.ymin, b.ymin); + swap(a.ymax, b.ymax); + swap(a.zmin, b.zmin); + swap(a.zmax, b.zmax); + swap(a.mmin, b.mmin); + swap(a.mmax, b.mmax); + swap(a.__isset, b.__isset); +} + +bool BoundingBox::operator==(const BoundingBox & rhs) const +{ + if (!(xmin == rhs.xmin)) + return false; + if (!(xmax == rhs.xmax)) + return false; + if (!(ymin == rhs.ymin)) + return false; + if (!(ymax == rhs.ymax)) + return false; + if (__isset.zmin != rhs.__isset.zmin) + return false; + else if (__isset.zmin && !(zmin == rhs.zmin)) + return false; + if (__isset.zmax != rhs.__isset.zmax) + return false; + else if (__isset.zmax && !(zmax == rhs.zmax)) + return false; + if (__isset.mmin != rhs.__isset.mmin) + return false; + else if (__isset.mmin && !(mmin == rhs.mmin)) + return false; + if (__isset.mmax != rhs.__isset.mmax) + return false; + else if (__isset.mmax && !(mmax == rhs.mmax)) + return false; + return true; +} + +BoundingBox::BoundingBox(const BoundingBox& other16) noexcept { + xmin = other16.xmin; + xmax = other16.xmax; + ymin = other16.ymin; + ymax = other16.ymax; + zmin = other16.zmin; + zmax = other16.zmax; + mmin = other16.mmin; + mmax = other16.mmax; + __isset = other16.__isset; +} +BoundingBox::BoundingBox(BoundingBox&& other17) noexcept { + xmin = other17.xmin; + xmax = other17.xmax; + ymin = other17.ymin; + ymax = other17.ymax; + zmin = other17.zmin; + zmax = other17.zmax; + mmin = other17.mmin; + mmax = other17.mmax; + __isset = other17.__isset; +} +BoundingBox& BoundingBox::operator=(const BoundingBox& other18) noexcept { + xmin = other18.xmin; + xmax = other18.xmax; + ymin = other18.ymin; + ymax = other18.ymax; + zmin = other18.zmin; + zmax = other18.zmax; + mmin = other18.mmin; + mmax = other18.mmax; + __isset = other18.__isset; + return *this; +} +BoundingBox& BoundingBox::operator=(BoundingBox&& other19) noexcept { + xmin = other19.xmin; + xmax = other19.xmax; + ymin = other19.ymin; + ymax = other19.ymax; + zmin = other19.zmin; + zmax = other19.zmax; + mmin = other19.mmin; + mmax = other19.mmax; + __isset = other19.__isset; + return *this; +} +void BoundingBox::printTo(std::ostream& out) const { + using ::apache::thrift::to_string; + out << "BoundingBox("; + out << "xmin=" << to_string(xmin); + out << ", " << "xmax=" << to_string(xmax); + out << ", " << "ymin=" << to_string(ymin); + out << ", " << "ymax=" << to_string(ymax); + out << ", " << "zmin="; (__isset.zmin ? (out << to_string(zmin)) : (out << "")); + out << ", " << "zmax="; (__isset.zmax ? (out << to_string(zmax)) : (out << "")); + out << ", " << "mmin="; (__isset.mmin ? (out << to_string(mmin)) : (out << "")); + out << ", " << "mmax="; (__isset.mmax ? (out << to_string(mmax)) : (out << "")); + out << ")"; +} + + +GeospatialStatistics::~GeospatialStatistics() noexcept { +} + +GeospatialStatistics::GeospatialStatistics() noexcept { +} + +void GeospatialStatistics::__set_bbox(const BoundingBox& val) { + this->bbox = val; +__isset.bbox = true; +} + +void GeospatialStatistics::__set_geospatial_types(const std::vector & val) { + this->geospatial_types = val; +__isset.geospatial_types = true; +} +std::ostream& operator<<(std::ostream& out, const GeospatialStatistics& obj) +{ + obj.printTo(out); + return out; +} + + +void swap(GeospatialStatistics &a, GeospatialStatistics &b) { + using ::std::swap; + swap(a.bbox, b.bbox); + swap(a.geospatial_types, b.geospatial_types); + swap(a.__isset, b.__isset); +} + +bool GeospatialStatistics::operator==(const GeospatialStatistics & rhs) const +{ + if (__isset.bbox != rhs.__isset.bbox) + return false; + else if (__isset.bbox && !(bbox == rhs.bbox)) + return false; + if (__isset.geospatial_types != rhs.__isset.geospatial_types) + return false; + else if (__isset.geospatial_types && !(geospatial_types == rhs.geospatial_types)) + return false; + return true; +} + +GeospatialStatistics::GeospatialStatistics(const GeospatialStatistics& other26) { + bbox = other26.bbox; + geospatial_types = other26.geospatial_types; + __isset = other26.__isset; +} +GeospatialStatistics::GeospatialStatistics(GeospatialStatistics&& other27) noexcept { + bbox = std::move(other27.bbox); + geospatial_types = std::move(other27.geospatial_types); + __isset = other27.__isset; +} +GeospatialStatistics& GeospatialStatistics::operator=(const GeospatialStatistics& other28) { + bbox = other28.bbox; + geospatial_types = other28.geospatial_types; + __isset = other28.__isset; + return *this; +} +GeospatialStatistics& GeospatialStatistics::operator=(GeospatialStatistics&& other29) noexcept { + bbox = std::move(other29.bbox); + geospatial_types = std::move(other29.geospatial_types); + __isset = other29.__isset; + return *this; +} +void GeospatialStatistics::printTo(std::ostream& out) const { + using ::apache::thrift::to_string; + out << "GeospatialStatistics("; + out << "bbox="; (__isset.bbox ? (out << to_string(bbox)) : (out << "")); + out << ", " << "geospatial_types="; (__isset.geospatial_types ? (out << to_string(geospatial_types)) : (out << "")); + out << ")"; +} + + Statistics::~Statistics() noexcept { } +Statistics::Statistics() noexcept + : max(), + min(), + null_count(0), + distinct_count(0), + max_value(), + min_value(), + is_max_value_exact(0), + is_min_value_exact(0) { +} void Statistics::__set_max(const std::string& val) { this->max = val; @@ -747,50 +1050,87 @@ void swap(Statistics &a, Statistics &b) { swap(a.__isset, b.__isset); } -Statistics::Statistics(const Statistics& other16) { - max = other16.max; - min = other16.min; - null_count = other16.null_count; - distinct_count = other16.distinct_count; - max_value = other16.max_value; - min_value = other16.min_value; - is_max_value_exact = other16.is_max_value_exact; - is_min_value_exact = other16.is_min_value_exact; - __isset = other16.__isset; -} -Statistics::Statistics(Statistics&& other17) noexcept { - max = std::move(other17.max); - min = std::move(other17.min); - null_count = other17.null_count; - distinct_count = other17.distinct_count; - max_value = std::move(other17.max_value); - min_value = std::move(other17.min_value); - is_max_value_exact = other17.is_max_value_exact; - is_min_value_exact = other17.is_min_value_exact; - __isset = other17.__isset; -} -Statistics& Statistics::operator=(const Statistics& other18) { - max = other18.max; - min = other18.min; - null_count = other18.null_count; - distinct_count = other18.distinct_count; - max_value = other18.max_value; - min_value = other18.min_value; - is_max_value_exact = other18.is_max_value_exact; - is_min_value_exact = other18.is_min_value_exact; - __isset = other18.__isset; +bool Statistics::operator==(const Statistics & rhs) const +{ + if (__isset.max != rhs.__isset.max) + return false; + else if (__isset.max && !(max == rhs.max)) + return false; + if (__isset.min != rhs.__isset.min) + return false; + else if (__isset.min && !(min == rhs.min)) + return false; + if (__isset.null_count != rhs.__isset.null_count) + return false; + else if (__isset.null_count && !(null_count == rhs.null_count)) + return false; + if (__isset.distinct_count != rhs.__isset.distinct_count) + return false; + else if (__isset.distinct_count && !(distinct_count == rhs.distinct_count)) + return false; + if (__isset.max_value != rhs.__isset.max_value) + return false; + else if (__isset.max_value && !(max_value == rhs.max_value)) + return false; + if (__isset.min_value != rhs.__isset.min_value) + return false; + else if (__isset.min_value && !(min_value == rhs.min_value)) + return false; + if (__isset.is_max_value_exact != rhs.__isset.is_max_value_exact) + return false; + else if (__isset.is_max_value_exact && !(is_max_value_exact == rhs.is_max_value_exact)) + return false; + if (__isset.is_min_value_exact != rhs.__isset.is_min_value_exact) + return false; + else if (__isset.is_min_value_exact && !(is_min_value_exact == rhs.is_min_value_exact)) + return false; + return true; +} + +Statistics::Statistics(const Statistics& other30) { + max = other30.max; + min = other30.min; + null_count = other30.null_count; + distinct_count = other30.distinct_count; + max_value = other30.max_value; + min_value = other30.min_value; + is_max_value_exact = other30.is_max_value_exact; + is_min_value_exact = other30.is_min_value_exact; + __isset = other30.__isset; +} +Statistics::Statistics(Statistics&& other31) noexcept { + max = std::move(other31.max); + min = std::move(other31.min); + null_count = other31.null_count; + distinct_count = other31.distinct_count; + max_value = std::move(other31.max_value); + min_value = std::move(other31.min_value); + is_max_value_exact = other31.is_max_value_exact; + is_min_value_exact = other31.is_min_value_exact; + __isset = other31.__isset; +} +Statistics& Statistics::operator=(const Statistics& other32) { + max = other32.max; + min = other32.min; + null_count = other32.null_count; + distinct_count = other32.distinct_count; + max_value = other32.max_value; + min_value = other32.min_value; + is_max_value_exact = other32.is_max_value_exact; + is_min_value_exact = other32.is_min_value_exact; + __isset = other32.__isset; return *this; } -Statistics& Statistics::operator=(Statistics&& other19) noexcept { - max = std::move(other19.max); - min = std::move(other19.min); - null_count = other19.null_count; - distinct_count = other19.distinct_count; - max_value = std::move(other19.max_value); - min_value = std::move(other19.min_value); - is_max_value_exact = other19.is_max_value_exact; - is_min_value_exact = other19.is_min_value_exact; - __isset = other19.__isset; +Statistics& Statistics::operator=(Statistics&& other33) noexcept { + max = std::move(other33.max); + min = std::move(other33.min); + null_count = other33.null_count; + distinct_count = other33.distinct_count; + max_value = std::move(other33.max_value); + min_value = std::move(other33.min_value); + is_max_value_exact = other33.is_max_value_exact; + is_min_value_exact = other33.is_min_value_exact; + __isset = other33.__isset; return *this; } void Statistics::printTo(std::ostream& out) const { @@ -811,6 +1151,8 @@ void Statistics::printTo(std::ostream& out) const { StringType::~StringType() noexcept { } +StringType::StringType() noexcept { +} std::ostream& operator<<(std::ostream& out, const StringType& obj) { obj.printTo(out); @@ -824,18 +1166,23 @@ void swap(StringType &a, StringType &b) { (void) b; } -StringType::StringType(const StringType& other20) noexcept { - (void) other20; +bool StringType::operator==(const StringType & /* rhs */) const +{ + return true; +} + +StringType::StringType(const StringType& other34) noexcept { + (void) other34; } -StringType::StringType(StringType&& other21) noexcept { - (void) other21; +StringType::StringType(StringType&& other35) noexcept { + (void) other35; } -StringType& StringType::operator=(const StringType& other22) noexcept { - (void) other22; +StringType& StringType::operator=(const StringType& other36) noexcept { + (void) other36; return *this; } -StringType& StringType::operator=(StringType&& other23) noexcept { - (void) other23; +StringType& StringType::operator=(StringType&& other37) noexcept { + (void) other37; return *this; } void StringType::printTo(std::ostream& out) const { @@ -848,6 +1195,8 @@ void StringType::printTo(std::ostream& out) const { UUIDType::~UUIDType() noexcept { } +UUIDType::UUIDType() noexcept { +} std::ostream& operator<<(std::ostream& out, const UUIDType& obj) { obj.printTo(out); @@ -861,18 +1210,23 @@ void swap(UUIDType &a, UUIDType &b) { (void) b; } -UUIDType::UUIDType(const UUIDType& other24) noexcept { - (void) other24; +bool UUIDType::operator==(const UUIDType & /* rhs */) const +{ + return true; +} + +UUIDType::UUIDType(const UUIDType& other38) noexcept { + (void) other38; } -UUIDType::UUIDType(UUIDType&& other25) noexcept { - (void) other25; +UUIDType::UUIDType(UUIDType&& other39) noexcept { + (void) other39; } -UUIDType& UUIDType::operator=(const UUIDType& other26) noexcept { - (void) other26; +UUIDType& UUIDType::operator=(const UUIDType& other40) noexcept { + (void) other40; return *this; } -UUIDType& UUIDType::operator=(UUIDType&& other27) noexcept { - (void) other27; +UUIDType& UUIDType::operator=(UUIDType&& other41) noexcept { + (void) other41; return *this; } void UUIDType::printTo(std::ostream& out) const { @@ -885,6 +1239,8 @@ void UUIDType::printTo(std::ostream& out) const { MapType::~MapType() noexcept { } +MapType::MapType() noexcept { +} std::ostream& operator<<(std::ostream& out, const MapType& obj) { obj.printTo(out); @@ -898,18 +1254,23 @@ void swap(MapType &a, MapType &b) { (void) b; } -MapType::MapType(const MapType& other28) noexcept { - (void) other28; +bool MapType::operator==(const MapType & /* rhs */) const +{ + return true; } -MapType::MapType(MapType&& other29) noexcept { - (void) other29; + +MapType::MapType(const MapType& other42) noexcept { + (void) other42; } -MapType& MapType::operator=(const MapType& other30) noexcept { - (void) other30; +MapType::MapType(MapType&& other43) noexcept { + (void) other43; +} +MapType& MapType::operator=(const MapType& other44) noexcept { + (void) other44; return *this; } -MapType& MapType::operator=(MapType&& other31) noexcept { - (void) other31; +MapType& MapType::operator=(MapType&& other45) noexcept { + (void) other45; return *this; } void MapType::printTo(std::ostream& out) const { @@ -922,6 +1283,8 @@ void MapType::printTo(std::ostream& out) const { ListType::~ListType() noexcept { } +ListType::ListType() noexcept { +} std::ostream& operator<<(std::ostream& out, const ListType& obj) { obj.printTo(out); @@ -935,18 +1298,23 @@ void swap(ListType &a, ListType &b) { (void) b; } -ListType::ListType(const ListType& other32) noexcept { - (void) other32; +bool ListType::operator==(const ListType & /* rhs */) const +{ + return true; +} + +ListType::ListType(const ListType& other46) noexcept { + (void) other46; } -ListType::ListType(ListType&& other33) noexcept { - (void) other33; +ListType::ListType(ListType&& other47) noexcept { + (void) other47; } -ListType& ListType::operator=(const ListType& other34) noexcept { - (void) other34; +ListType& ListType::operator=(const ListType& other48) noexcept { + (void) other48; return *this; } -ListType& ListType::operator=(ListType&& other35) noexcept { - (void) other35; +ListType& ListType::operator=(ListType&& other49) noexcept { + (void) other49; return *this; } void ListType::printTo(std::ostream& out) const { @@ -959,6 +1327,8 @@ void ListType::printTo(std::ostream& out) const { EnumType::~EnumType() noexcept { } +EnumType::EnumType() noexcept { +} std::ostream& operator<<(std::ostream& out, const EnumType& obj) { obj.printTo(out); @@ -972,18 +1342,23 @@ void swap(EnumType &a, EnumType &b) { (void) b; } -EnumType::EnumType(const EnumType& other36) noexcept { - (void) other36; +bool EnumType::operator==(const EnumType & /* rhs */) const +{ + return true; } -EnumType::EnumType(EnumType&& other37) noexcept { - (void) other37; + +EnumType::EnumType(const EnumType& other50) noexcept { + (void) other50; } -EnumType& EnumType::operator=(const EnumType& other38) noexcept { - (void) other38; +EnumType::EnumType(EnumType&& other51) noexcept { + (void) other51; +} +EnumType& EnumType::operator=(const EnumType& other52) noexcept { + (void) other52; return *this; } -EnumType& EnumType::operator=(EnumType&& other39) noexcept { - (void) other39; +EnumType& EnumType::operator=(EnumType&& other53) noexcept { + (void) other53; return *this; } void EnumType::printTo(std::ostream& out) const { @@ -996,6 +1371,8 @@ void EnumType::printTo(std::ostream& out) const { DateType::~DateType() noexcept { } +DateType::DateType() noexcept { +} std::ostream& operator<<(std::ostream& out, const DateType& obj) { obj.printTo(out); @@ -1009,18 +1386,23 @@ void swap(DateType &a, DateType &b) { (void) b; } -DateType::DateType(const DateType& other40) noexcept { - (void) other40; +bool DateType::operator==(const DateType & /* rhs */) const +{ + return true; } -DateType::DateType(DateType&& other41) noexcept { - (void) other41; + +DateType::DateType(const DateType& other54) noexcept { + (void) other54; } -DateType& DateType::operator=(const DateType& other42) noexcept { - (void) other42; +DateType::DateType(DateType&& other55) noexcept { + (void) other55; +} +DateType& DateType::operator=(const DateType& other56) noexcept { + (void) other56; return *this; } -DateType& DateType::operator=(DateType&& other43) noexcept { - (void) other43; +DateType& DateType::operator=(DateType&& other57) noexcept { + (void) other57; return *this; } void DateType::printTo(std::ostream& out) const { @@ -1033,6 +1415,8 @@ void DateType::printTo(std::ostream& out) const { Float16Type::~Float16Type() noexcept { } +Float16Type::Float16Type() noexcept { +} std::ostream& operator<<(std::ostream& out, const Float16Type& obj) { obj.printTo(out); @@ -1046,18 +1430,23 @@ void swap(Float16Type &a, Float16Type &b) { (void) b; } -Float16Type::Float16Type(const Float16Type& other44) noexcept { - (void) other44; +bool Float16Type::operator==(const Float16Type & /* rhs */) const +{ + return true; } -Float16Type::Float16Type(Float16Type&& other45) noexcept { - (void) other45; + +Float16Type::Float16Type(const Float16Type& other58) noexcept { + (void) other58; } -Float16Type& Float16Type::operator=(const Float16Type& other46) noexcept { - (void) other46; +Float16Type::Float16Type(Float16Type&& other59) noexcept { + (void) other59; +} +Float16Type& Float16Type::operator=(const Float16Type& other60) noexcept { + (void) other60; return *this; } -Float16Type& Float16Type::operator=(Float16Type&& other47) noexcept { - (void) other47; +Float16Type& Float16Type::operator=(Float16Type&& other61) noexcept { + (void) other61; return *this; } void Float16Type::printTo(std::ostream& out) const { @@ -1070,6 +1459,8 @@ void Float16Type::printTo(std::ostream& out) const { NullType::~NullType() noexcept { } +NullType::NullType() noexcept { +} std::ostream& operator<<(std::ostream& out, const NullType& obj) { obj.printTo(out); @@ -1083,18 +1474,23 @@ void swap(NullType &a, NullType &b) { (void) b; } -NullType::NullType(const NullType& other48) noexcept { - (void) other48; +bool NullType::operator==(const NullType & /* rhs */) const +{ + return true; } -NullType::NullType(NullType&& other49) noexcept { - (void) other49; + +NullType::NullType(const NullType& other62) noexcept { + (void) other62; } -NullType& NullType::operator=(const NullType& other50) noexcept { - (void) other50; +NullType::NullType(NullType&& other63) noexcept { + (void) other63; +} +NullType& NullType::operator=(const NullType& other64) noexcept { + (void) other64; return *this; } -NullType& NullType::operator=(NullType&& other51) noexcept { - (void) other51; +NullType& NullType::operator=(NullType&& other65) noexcept { + (void) other65; return *this; } void NullType::printTo(std::ostream& out) const { @@ -1107,6 +1503,10 @@ void NullType::printTo(std::ostream& out) const { DecimalType::~DecimalType() noexcept { } +DecimalType::DecimalType() noexcept + : scale(0), + precision(0) { +} void DecimalType::__set_scale(const int32_t val) { this->scale = val; @@ -1128,22 +1528,31 @@ void swap(DecimalType &a, DecimalType &b) { swap(a.precision, b.precision); } -DecimalType::DecimalType(const DecimalType& other52) noexcept { - scale = other52.scale; - precision = other52.precision; +bool DecimalType::operator==(const DecimalType & rhs) const +{ + if (!(scale == rhs.scale)) + return false; + if (!(precision == rhs.precision)) + return false; + return true; +} + +DecimalType::DecimalType(const DecimalType& other66) noexcept { + scale = other66.scale; + precision = other66.precision; } -DecimalType::DecimalType(DecimalType&& other53) noexcept { - scale = other53.scale; - precision = other53.precision; +DecimalType::DecimalType(DecimalType&& other67) noexcept { + scale = other67.scale; + precision = other67.precision; } -DecimalType& DecimalType::operator=(const DecimalType& other54) noexcept { - scale = other54.scale; - precision = other54.precision; +DecimalType& DecimalType::operator=(const DecimalType& other68) noexcept { + scale = other68.scale; + precision = other68.precision; return *this; } -DecimalType& DecimalType::operator=(DecimalType&& other55) noexcept { - scale = other55.scale; - precision = other55.precision; +DecimalType& DecimalType::operator=(DecimalType&& other69) noexcept { + scale = other69.scale; + precision = other69.precision; return *this; } void DecimalType::printTo(std::ostream& out) const { @@ -1158,6 +1567,8 @@ void DecimalType::printTo(std::ostream& out) const { MilliSeconds::~MilliSeconds() noexcept { } +MilliSeconds::MilliSeconds() noexcept { +} std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj) { obj.printTo(out); @@ -1171,18 +1582,23 @@ void swap(MilliSeconds &a, MilliSeconds &b) { (void) b; } -MilliSeconds::MilliSeconds(const MilliSeconds& other56) noexcept { - (void) other56; +bool MilliSeconds::operator==(const MilliSeconds & /* rhs */) const +{ + return true; } -MilliSeconds::MilliSeconds(MilliSeconds&& other57) noexcept { - (void) other57; + +MilliSeconds::MilliSeconds(const MilliSeconds& other70) noexcept { + (void) other70; } -MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other58) noexcept { - (void) other58; +MilliSeconds::MilliSeconds(MilliSeconds&& other71) noexcept { + (void) other71; +} +MilliSeconds& MilliSeconds::operator=(const MilliSeconds& other72) noexcept { + (void) other72; return *this; } -MilliSeconds& MilliSeconds::operator=(MilliSeconds&& other59) noexcept { - (void) other59; +MilliSeconds& MilliSeconds::operator=(MilliSeconds&& other73) noexcept { + (void) other73; return *this; } void MilliSeconds::printTo(std::ostream& out) const { @@ -1195,6 +1611,8 @@ void MilliSeconds::printTo(std::ostream& out) const { MicroSeconds::~MicroSeconds() noexcept { } +MicroSeconds::MicroSeconds() noexcept { +} std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj) { obj.printTo(out); @@ -1208,18 +1626,23 @@ void swap(MicroSeconds &a, MicroSeconds &b) { (void) b; } -MicroSeconds::MicroSeconds(const MicroSeconds& other60) noexcept { - (void) other60; +bool MicroSeconds::operator==(const MicroSeconds & /* rhs */) const +{ + return true; } -MicroSeconds::MicroSeconds(MicroSeconds&& other61) noexcept { - (void) other61; + +MicroSeconds::MicroSeconds(const MicroSeconds& other74) noexcept { + (void) other74; } -MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other62) noexcept { - (void) other62; +MicroSeconds::MicroSeconds(MicroSeconds&& other75) noexcept { + (void) other75; +} +MicroSeconds& MicroSeconds::operator=(const MicroSeconds& other76) noexcept { + (void) other76; return *this; } -MicroSeconds& MicroSeconds::operator=(MicroSeconds&& other63) noexcept { - (void) other63; +MicroSeconds& MicroSeconds::operator=(MicroSeconds&& other77) noexcept { + (void) other77; return *this; } void MicroSeconds::printTo(std::ostream& out) const { @@ -1232,6 +1655,8 @@ void MicroSeconds::printTo(std::ostream& out) const { NanoSeconds::~NanoSeconds() noexcept { } +NanoSeconds::NanoSeconds() noexcept { +} std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj) { obj.printTo(out); @@ -1245,18 +1670,23 @@ void swap(NanoSeconds &a, NanoSeconds &b) { (void) b; } -NanoSeconds::NanoSeconds(const NanoSeconds& other64) noexcept { - (void) other64; +bool NanoSeconds::operator==(const NanoSeconds & /* rhs */) const +{ + return true; } -NanoSeconds::NanoSeconds(NanoSeconds&& other65) noexcept { - (void) other65; + +NanoSeconds::NanoSeconds(const NanoSeconds& other78) noexcept { + (void) other78; +} +NanoSeconds::NanoSeconds(NanoSeconds&& other79) noexcept { + (void) other79; } -NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other66) noexcept { - (void) other66; +NanoSeconds& NanoSeconds::operator=(const NanoSeconds& other80) noexcept { + (void) other80; return *this; } -NanoSeconds& NanoSeconds::operator=(NanoSeconds&& other67) noexcept { - (void) other67; +NanoSeconds& NanoSeconds::operator=(NanoSeconds&& other81) noexcept { + (void) other81; return *this; } void NanoSeconds::printTo(std::ostream& out) const { @@ -1269,6 +1699,8 @@ void NanoSeconds::printTo(std::ostream& out) const { TimeUnit::~TimeUnit() noexcept { } +TimeUnit::TimeUnit() noexcept { +} void TimeUnit::__set_MILLIS(const MilliSeconds& val) { this->MILLIS = val; @@ -1299,30 +1731,47 @@ void swap(TimeUnit &a, TimeUnit &b) { swap(a.__isset, b.__isset); } -TimeUnit::TimeUnit(const TimeUnit& other68) noexcept { - MILLIS = other68.MILLIS; - MICROS = other68.MICROS; - NANOS = other68.NANOS; - __isset = other68.__isset; -} -TimeUnit::TimeUnit(TimeUnit&& other69) noexcept { - MILLIS = std::move(other69.MILLIS); - MICROS = std::move(other69.MICROS); - NANOS = std::move(other69.NANOS); - __isset = other69.__isset; -} -TimeUnit& TimeUnit::operator=(const TimeUnit& other70) noexcept { - MILLIS = other70.MILLIS; - MICROS = other70.MICROS; - NANOS = other70.NANOS; - __isset = other70.__isset; +bool TimeUnit::operator==(const TimeUnit & rhs) const +{ + if (__isset.MILLIS != rhs.__isset.MILLIS) + return false; + else if (__isset.MILLIS && !(MILLIS == rhs.MILLIS)) + return false; + if (__isset.MICROS != rhs.__isset.MICROS) + return false; + else if (__isset.MICROS && !(MICROS == rhs.MICROS)) + return false; + if (__isset.NANOS != rhs.__isset.NANOS) + return false; + else if (__isset.NANOS && !(NANOS == rhs.NANOS)) + return false; + return true; +} + +TimeUnit::TimeUnit(const TimeUnit& other82) noexcept { + MILLIS = other82.MILLIS; + MICROS = other82.MICROS; + NANOS = other82.NANOS; + __isset = other82.__isset; +} +TimeUnit::TimeUnit(TimeUnit&& other83) noexcept { + MILLIS = std::move(other83.MILLIS); + MICROS = std::move(other83.MICROS); + NANOS = std::move(other83.NANOS); + __isset = other83.__isset; +} +TimeUnit& TimeUnit::operator=(const TimeUnit& other84) noexcept { + MILLIS = other84.MILLIS; + MICROS = other84.MICROS; + NANOS = other84.NANOS; + __isset = other84.__isset; return *this; } -TimeUnit& TimeUnit::operator=(TimeUnit&& other71) noexcept { - MILLIS = std::move(other71.MILLIS); - MICROS = std::move(other71.MICROS); - NANOS = std::move(other71.NANOS); - __isset = other71.__isset; +TimeUnit& TimeUnit::operator=(TimeUnit&& other85) noexcept { + MILLIS = std::move(other85.MILLIS); + MICROS = std::move(other85.MICROS); + NANOS = std::move(other85.NANOS); + __isset = other85.__isset; return *this; } void TimeUnit::printTo(std::ostream& out) const { @@ -1338,6 +1787,9 @@ void TimeUnit::printTo(std::ostream& out) const { TimestampType::~TimestampType() noexcept { } +TimestampType::TimestampType() noexcept + : isAdjustedToUTC(0) { +} void TimestampType::__set_isAdjustedToUTC(const bool val) { this->isAdjustedToUTC = val; @@ -1359,22 +1811,31 @@ void swap(TimestampType &a, TimestampType &b) { swap(a.unit, b.unit); } -TimestampType::TimestampType(const TimestampType& other72) noexcept { - isAdjustedToUTC = other72.isAdjustedToUTC; - unit = other72.unit; +bool TimestampType::operator==(const TimestampType & rhs) const +{ + if (!(isAdjustedToUTC == rhs.isAdjustedToUTC)) + return false; + if (!(unit == rhs.unit)) + return false; + return true; } -TimestampType::TimestampType(TimestampType&& other73) noexcept { - isAdjustedToUTC = other73.isAdjustedToUTC; - unit = std::move(other73.unit); + +TimestampType::TimestampType(const TimestampType& other86) noexcept { + isAdjustedToUTC = other86.isAdjustedToUTC; + unit = other86.unit; +} +TimestampType::TimestampType(TimestampType&& other87) noexcept { + isAdjustedToUTC = other87.isAdjustedToUTC; + unit = std::move(other87.unit); } -TimestampType& TimestampType::operator=(const TimestampType& other74) noexcept { - isAdjustedToUTC = other74.isAdjustedToUTC; - unit = other74.unit; +TimestampType& TimestampType::operator=(const TimestampType& other88) noexcept { + isAdjustedToUTC = other88.isAdjustedToUTC; + unit = other88.unit; return *this; } -TimestampType& TimestampType::operator=(TimestampType&& other75) noexcept { - isAdjustedToUTC = other75.isAdjustedToUTC; - unit = std::move(other75.unit); +TimestampType& TimestampType::operator=(TimestampType&& other89) noexcept { + isAdjustedToUTC = other89.isAdjustedToUTC; + unit = std::move(other89.unit); return *this; } void TimestampType::printTo(std::ostream& out) const { @@ -1389,6 +1850,9 @@ void TimestampType::printTo(std::ostream& out) const { TimeType::~TimeType() noexcept { } +TimeType::TimeType() noexcept + : isAdjustedToUTC(0) { +} void TimeType::__set_isAdjustedToUTC(const bool val) { this->isAdjustedToUTC = val; @@ -1410,22 +1874,31 @@ void swap(TimeType &a, TimeType &b) { swap(a.unit, b.unit); } -TimeType::TimeType(const TimeType& other76) noexcept { - isAdjustedToUTC = other76.isAdjustedToUTC; - unit = other76.unit; +bool TimeType::operator==(const TimeType & rhs) const +{ + if (!(isAdjustedToUTC == rhs.isAdjustedToUTC)) + return false; + if (!(unit == rhs.unit)) + return false; + return true; } -TimeType::TimeType(TimeType&& other77) noexcept { - isAdjustedToUTC = other77.isAdjustedToUTC; - unit = std::move(other77.unit); + +TimeType::TimeType(const TimeType& other90) noexcept { + isAdjustedToUTC = other90.isAdjustedToUTC; + unit = other90.unit; +} +TimeType::TimeType(TimeType&& other91) noexcept { + isAdjustedToUTC = other91.isAdjustedToUTC; + unit = std::move(other91.unit); } -TimeType& TimeType::operator=(const TimeType& other78) noexcept { - isAdjustedToUTC = other78.isAdjustedToUTC; - unit = other78.unit; +TimeType& TimeType::operator=(const TimeType& other92) noexcept { + isAdjustedToUTC = other92.isAdjustedToUTC; + unit = other92.unit; return *this; } -TimeType& TimeType::operator=(TimeType&& other79) noexcept { - isAdjustedToUTC = other79.isAdjustedToUTC; - unit = std::move(other79.unit); +TimeType& TimeType::operator=(TimeType&& other93) noexcept { + isAdjustedToUTC = other93.isAdjustedToUTC; + unit = std::move(other93.unit); return *this; } void TimeType::printTo(std::ostream& out) const { @@ -1440,6 +1913,10 @@ void TimeType::printTo(std::ostream& out) const { IntType::~IntType() noexcept { } +IntType::IntType() noexcept + : bitWidth(0), + isSigned(0) { +} void IntType::__set_bitWidth(const int8_t val) { this->bitWidth = val; @@ -1461,22 +1938,31 @@ void swap(IntType &a, IntType &b) { swap(a.isSigned, b.isSigned); } -IntType::IntType(const IntType& other80) noexcept { - bitWidth = other80.bitWidth; - isSigned = other80.isSigned; +bool IntType::operator==(const IntType & rhs) const +{ + if (!(bitWidth == rhs.bitWidth)) + return false; + if (!(isSigned == rhs.isSigned)) + return false; + return true; +} + +IntType::IntType(const IntType& other94) noexcept { + bitWidth = other94.bitWidth; + isSigned = other94.isSigned; } -IntType::IntType(IntType&& other81) noexcept { - bitWidth = other81.bitWidth; - isSigned = other81.isSigned; +IntType::IntType(IntType&& other95) noexcept { + bitWidth = other95.bitWidth; + isSigned = other95.isSigned; } -IntType& IntType::operator=(const IntType& other82) noexcept { - bitWidth = other82.bitWidth; - isSigned = other82.isSigned; +IntType& IntType::operator=(const IntType& other96) noexcept { + bitWidth = other96.bitWidth; + isSigned = other96.isSigned; return *this; } -IntType& IntType::operator=(IntType&& other83) noexcept { - bitWidth = other83.bitWidth; - isSigned = other83.isSigned; +IntType& IntType::operator=(IntType&& other97) noexcept { + bitWidth = other97.bitWidth; + isSigned = other97.isSigned; return *this; } void IntType::printTo(std::ostream& out) const { @@ -1491,6 +1977,8 @@ void IntType::printTo(std::ostream& out) const { JsonType::~JsonType() noexcept { } +JsonType::JsonType() noexcept { +} std::ostream& operator<<(std::ostream& out, const JsonType& obj) { obj.printTo(out); @@ -1504,18 +1992,23 @@ void swap(JsonType &a, JsonType &b) { (void) b; } -JsonType::JsonType(const JsonType& other84) noexcept { - (void) other84; +bool JsonType::operator==(const JsonType & /* rhs */) const +{ + return true; +} + +JsonType::JsonType(const JsonType& other98) noexcept { + (void) other98; } -JsonType::JsonType(JsonType&& other85) noexcept { - (void) other85; +JsonType::JsonType(JsonType&& other99) noexcept { + (void) other99; } -JsonType& JsonType::operator=(const JsonType& other86) noexcept { - (void) other86; +JsonType& JsonType::operator=(const JsonType& other100) noexcept { + (void) other100; return *this; } -JsonType& JsonType::operator=(JsonType&& other87) noexcept { - (void) other87; +JsonType& JsonType::operator=(JsonType&& other101) noexcept { + (void) other101; return *this; } void JsonType::printTo(std::ostream& out) const { @@ -1528,6 +2021,8 @@ void JsonType::printTo(std::ostream& out) const { BsonType::~BsonType() noexcept { } +BsonType::BsonType() noexcept { +} std::ostream& operator<<(std::ostream& out, const BsonType& obj) { obj.printTo(out); @@ -1541,18 +2036,23 @@ void swap(BsonType &a, BsonType &b) { (void) b; } -BsonType::BsonType(const BsonType& other88) noexcept { - (void) other88; +bool BsonType::operator==(const BsonType & /* rhs */) const +{ + return true; +} + +BsonType::BsonType(const BsonType& other102) noexcept { + (void) other102; } -BsonType::BsonType(BsonType&& other89) noexcept { - (void) other89; +BsonType::BsonType(BsonType&& other103) noexcept { + (void) other103; } -BsonType& BsonType::operator=(const BsonType& other90) noexcept { - (void) other90; +BsonType& BsonType::operator=(const BsonType& other104) noexcept { + (void) other104; return *this; } -BsonType& BsonType::operator=(BsonType&& other91) noexcept { - (void) other91; +BsonType& BsonType::operator=(BsonType&& other105) noexcept { + (void) other105; return *this; } void BsonType::printTo(std::ostream& out) const { @@ -1562,9 +2062,189 @@ void BsonType::printTo(std::ostream& out) const { } +VariantType::~VariantType() noexcept { +} + +VariantType::VariantType() noexcept { +} +std::ostream& operator<<(std::ostream& out, const VariantType& obj) +{ + obj.printTo(out); + return out; +} + + +void swap(VariantType &a, VariantType &b) { + using ::std::swap; + (void) a; + (void) b; +} + +bool VariantType::operator==(const VariantType & /* rhs */) const +{ + return true; +} + +VariantType::VariantType(const VariantType& other106) noexcept { + (void) other106; +} +VariantType::VariantType(VariantType&& other107) noexcept { + (void) other107; +} +VariantType& VariantType::operator=(const VariantType& other108) noexcept { + (void) other108; + return *this; +} +VariantType& VariantType::operator=(VariantType&& other109) noexcept { + (void) other109; + return *this; +} +void VariantType::printTo(std::ostream& out) const { + using ::apache::thrift::to_string; + out << "VariantType("; + out << ")"; +} + + +GeometryType::~GeometryType() noexcept { +} + +GeometryType::GeometryType() noexcept + : crs() { +} + +void GeometryType::__set_crs(const std::string& val) { + this->crs = val; +__isset.crs = true; +} +std::ostream& operator<<(std::ostream& out, const GeometryType& obj) +{ + obj.printTo(out); + return out; +} + + +void swap(GeometryType &a, GeometryType &b) { + using ::std::swap; + swap(a.crs, b.crs); + swap(a.__isset, b.__isset); +} + +bool GeometryType::operator==(const GeometryType & rhs) const +{ + if (__isset.crs != rhs.__isset.crs) + return false; + else if (__isset.crs && !(crs == rhs.crs)) + return false; + return true; +} + +GeometryType::GeometryType(const GeometryType& other110) { + crs = other110.crs; + __isset = other110.__isset; +} +GeometryType::GeometryType(GeometryType&& other111) noexcept { + crs = std::move(other111.crs); + __isset = other111.__isset; +} +GeometryType& GeometryType::operator=(const GeometryType& other112) { + crs = other112.crs; + __isset = other112.__isset; + return *this; +} +GeometryType& GeometryType::operator=(GeometryType&& other113) noexcept { + crs = std::move(other113.crs); + __isset = other113.__isset; + return *this; +} +void GeometryType::printTo(std::ostream& out) const { + using ::apache::thrift::to_string; + out << "GeometryType("; + out << "crs="; (__isset.crs ? (out << to_string(crs)) : (out << "")); + out << ")"; +} + + +GeographyType::~GeographyType() noexcept { +} + +GeographyType::GeographyType() noexcept + : crs(), + algorithm(static_cast(0)) { +} + +void GeographyType::__set_crs(const std::string& val) { + this->crs = val; +__isset.crs = true; +} + +void GeographyType::__set_algorithm(const EdgeInterpolationAlgorithm::type val) { + this->algorithm = val; +__isset.algorithm = true; +} +std::ostream& operator<<(std::ostream& out, const GeographyType& obj) +{ + obj.printTo(out); + return out; +} + + +void swap(GeographyType &a, GeographyType &b) { + using ::std::swap; + swap(a.crs, b.crs); + swap(a.algorithm, b.algorithm); + swap(a.__isset, b.__isset); +} + +bool GeographyType::operator==(const GeographyType & rhs) const +{ + if (__isset.crs != rhs.__isset.crs) + return false; + else if (__isset.crs && !(crs == rhs.crs)) + return false; + if (__isset.algorithm != rhs.__isset.algorithm) + return false; + else if (__isset.algorithm && !(algorithm == rhs.algorithm)) + return false; + return true; +} + +GeographyType::GeographyType(const GeographyType& other115) { + crs = other115.crs; + algorithm = other115.algorithm; + __isset = other115.__isset; +} +GeographyType::GeographyType(GeographyType&& other116) noexcept { + crs = std::move(other116.crs); + algorithm = other116.algorithm; + __isset = other116.__isset; +} +GeographyType& GeographyType::operator=(const GeographyType& other117) { + crs = other117.crs; + algorithm = other117.algorithm; + __isset = other117.__isset; + return *this; +} +GeographyType& GeographyType::operator=(GeographyType&& other118) noexcept { + crs = std::move(other118.crs); + algorithm = other118.algorithm; + __isset = other118.__isset; + return *this; +} +void GeographyType::printTo(std::ostream& out) const { + using ::apache::thrift::to_string; + out << "GeographyType("; + out << "crs="; (__isset.crs ? (out << to_string(crs)) : (out << "")); + out << ", " << "algorithm="; (__isset.algorithm ? (out << to_string(algorithm)) : (out << "")); + out << ")"; +} + + LogicalType::~LogicalType() noexcept { } +LogicalType::LogicalType() noexcept { +} void LogicalType::__set_STRING(const StringType& val) { this->STRING = val; @@ -1635,6 +2315,21 @@ void LogicalType::__set_FLOAT16(const Float16Type& val) { this->FLOAT16 = val; __isset.FLOAT16 = true; } + +void LogicalType::__set_VARIANT(const VariantType& val) { + this->VARIANT = val; +__isset.VARIANT = true; +} + +void LogicalType::__set_GEOMETRY(const GeometryType& val) { + this->GEOMETRY = val; +__isset.GEOMETRY = true; +} + +void LogicalType::__set_GEOGRAPHY(const GeographyType& val) { + this->GEOGRAPHY = val; +__isset.GEOGRAPHY = true; +} std::ostream& operator<<(std::ostream& out, const LogicalType& obj) { obj.printTo(out); @@ -1658,77 +2353,165 @@ void swap(LogicalType &a, LogicalType &b) { swap(a.BSON, b.BSON); swap(a.UUID, b.UUID); swap(a.FLOAT16, b.FLOAT16); + swap(a.VARIANT, b.VARIANT); + swap(a.GEOMETRY, b.GEOMETRY); + swap(a.GEOGRAPHY, b.GEOGRAPHY); swap(a.__isset, b.__isset); } -LogicalType::LogicalType(const LogicalType& other92) noexcept { - STRING = other92.STRING; - MAP = other92.MAP; - LIST = other92.LIST; - ENUM = other92.ENUM; - DECIMAL = other92.DECIMAL; - DATE = other92.DATE; - TIME = other92.TIME; - TIMESTAMP = other92.TIMESTAMP; - INTEGER = other92.INTEGER; - UNKNOWN = other92.UNKNOWN; - JSON = other92.JSON; - BSON = other92.BSON; - UUID = other92.UUID; - FLOAT16 = other92.FLOAT16; - __isset = other92.__isset; -} -LogicalType::LogicalType(LogicalType&& other93) noexcept { - STRING = std::move(other93.STRING); - MAP = std::move(other93.MAP); - LIST = std::move(other93.LIST); - ENUM = std::move(other93.ENUM); - DECIMAL = std::move(other93.DECIMAL); - DATE = std::move(other93.DATE); - TIME = std::move(other93.TIME); - TIMESTAMP = std::move(other93.TIMESTAMP); - INTEGER = std::move(other93.INTEGER); - UNKNOWN = std::move(other93.UNKNOWN); - JSON = std::move(other93.JSON); - BSON = std::move(other93.BSON); - UUID = std::move(other93.UUID); - FLOAT16 = std::move(other93.FLOAT16); - __isset = other93.__isset; -} -LogicalType& LogicalType::operator=(const LogicalType& other94) noexcept { - STRING = other94.STRING; - MAP = other94.MAP; - LIST = other94.LIST; - ENUM = other94.ENUM; - DECIMAL = other94.DECIMAL; - DATE = other94.DATE; - TIME = other94.TIME; - TIMESTAMP = other94.TIMESTAMP; - INTEGER = other94.INTEGER; - UNKNOWN = other94.UNKNOWN; - JSON = other94.JSON; - BSON = other94.BSON; - UUID = other94.UUID; - FLOAT16 = other94.FLOAT16; - __isset = other94.__isset; +bool LogicalType::operator==(const LogicalType & rhs) const +{ + if (__isset.STRING != rhs.__isset.STRING) + return false; + else if (__isset.STRING && !(STRING == rhs.STRING)) + return false; + if (__isset.MAP != rhs.__isset.MAP) + return false; + else if (__isset.MAP && !(MAP == rhs.MAP)) + return false; + if (__isset.LIST != rhs.__isset.LIST) + return false; + else if (__isset.LIST && !(LIST == rhs.LIST)) + return false; + if (__isset.ENUM != rhs.__isset.ENUM) + return false; + else if (__isset.ENUM && !(ENUM == rhs.ENUM)) + return false; + if (__isset.DECIMAL != rhs.__isset.DECIMAL) + return false; + else if (__isset.DECIMAL && !(DECIMAL == rhs.DECIMAL)) + return false; + if (__isset.DATE != rhs.__isset.DATE) + return false; + else if (__isset.DATE && !(DATE == rhs.DATE)) + return false; + if (__isset.TIME != rhs.__isset.TIME) + return false; + else if (__isset.TIME && !(TIME == rhs.TIME)) + return false; + if (__isset.TIMESTAMP != rhs.__isset.TIMESTAMP) + return false; + else if (__isset.TIMESTAMP && !(TIMESTAMP == rhs.TIMESTAMP)) + return false; + if (__isset.INTEGER != rhs.__isset.INTEGER) + return false; + else if (__isset.INTEGER && !(INTEGER == rhs.INTEGER)) + return false; + if (__isset.UNKNOWN != rhs.__isset.UNKNOWN) + return false; + else if (__isset.UNKNOWN && !(UNKNOWN == rhs.UNKNOWN)) + return false; + if (__isset.JSON != rhs.__isset.JSON) + return false; + else if (__isset.JSON && !(JSON == rhs.JSON)) + return false; + if (__isset.BSON != rhs.__isset.BSON) + return false; + else if (__isset.BSON && !(BSON == rhs.BSON)) + return false; + if (__isset.UUID != rhs.__isset.UUID) + return false; + else if (__isset.UUID && !(UUID == rhs.UUID)) + return false; + if (__isset.FLOAT16 != rhs.__isset.FLOAT16) + return false; + else if (__isset.FLOAT16 && !(FLOAT16 == rhs.FLOAT16)) + return false; + if (__isset.VARIANT != rhs.__isset.VARIANT) + return false; + else if (__isset.VARIANT && !(VARIANT == rhs.VARIANT)) + return false; + if (__isset.GEOMETRY != rhs.__isset.GEOMETRY) + return false; + else if (__isset.GEOMETRY && !(GEOMETRY == rhs.GEOMETRY)) + return false; + if (__isset.GEOGRAPHY != rhs.__isset.GEOGRAPHY) + return false; + else if (__isset.GEOGRAPHY && !(GEOGRAPHY == rhs.GEOGRAPHY)) + return false; + return true; +} + +LogicalType::LogicalType(const LogicalType& other119) { + STRING = other119.STRING; + MAP = other119.MAP; + LIST = other119.LIST; + ENUM = other119.ENUM; + DECIMAL = other119.DECIMAL; + DATE = other119.DATE; + TIME = other119.TIME; + TIMESTAMP = other119.TIMESTAMP; + INTEGER = other119.INTEGER; + UNKNOWN = other119.UNKNOWN; + JSON = other119.JSON; + BSON = other119.BSON; + UUID = other119.UUID; + FLOAT16 = other119.FLOAT16; + VARIANT = other119.VARIANT; + GEOMETRY = other119.GEOMETRY; + GEOGRAPHY = other119.GEOGRAPHY; + __isset = other119.__isset; +} +LogicalType::LogicalType(LogicalType&& other120) noexcept { + STRING = std::move(other120.STRING); + MAP = std::move(other120.MAP); + LIST = std::move(other120.LIST); + ENUM = std::move(other120.ENUM); + DECIMAL = std::move(other120.DECIMAL); + DATE = std::move(other120.DATE); + TIME = std::move(other120.TIME); + TIMESTAMP = std::move(other120.TIMESTAMP); + INTEGER = std::move(other120.INTEGER); + UNKNOWN = std::move(other120.UNKNOWN); + JSON = std::move(other120.JSON); + BSON = std::move(other120.BSON); + UUID = std::move(other120.UUID); + FLOAT16 = std::move(other120.FLOAT16); + VARIANT = std::move(other120.VARIANT); + GEOMETRY = std::move(other120.GEOMETRY); + GEOGRAPHY = std::move(other120.GEOGRAPHY); + __isset = other120.__isset; +} +LogicalType& LogicalType::operator=(const LogicalType& other121) { + STRING = other121.STRING; + MAP = other121.MAP; + LIST = other121.LIST; + ENUM = other121.ENUM; + DECIMAL = other121.DECIMAL; + DATE = other121.DATE; + TIME = other121.TIME; + TIMESTAMP = other121.TIMESTAMP; + INTEGER = other121.INTEGER; + UNKNOWN = other121.UNKNOWN; + JSON = other121.JSON; + BSON = other121.BSON; + UUID = other121.UUID; + FLOAT16 = other121.FLOAT16; + VARIANT = other121.VARIANT; + GEOMETRY = other121.GEOMETRY; + GEOGRAPHY = other121.GEOGRAPHY; + __isset = other121.__isset; return *this; } -LogicalType& LogicalType::operator=(LogicalType&& other95) noexcept { - STRING = std::move(other95.STRING); - MAP = std::move(other95.MAP); - LIST = std::move(other95.LIST); - ENUM = std::move(other95.ENUM); - DECIMAL = std::move(other95.DECIMAL); - DATE = std::move(other95.DATE); - TIME = std::move(other95.TIME); - TIMESTAMP = std::move(other95.TIMESTAMP); - INTEGER = std::move(other95.INTEGER); - UNKNOWN = std::move(other95.UNKNOWN); - JSON = std::move(other95.JSON); - BSON = std::move(other95.BSON); - UUID = std::move(other95.UUID); - FLOAT16 = std::move(other95.FLOAT16); - __isset = other95.__isset; +LogicalType& LogicalType::operator=(LogicalType&& other122) noexcept { + STRING = std::move(other122.STRING); + MAP = std::move(other122.MAP); + LIST = std::move(other122.LIST); + ENUM = std::move(other122.ENUM); + DECIMAL = std::move(other122.DECIMAL); + DATE = std::move(other122.DATE); + TIME = std::move(other122.TIME); + TIMESTAMP = std::move(other122.TIMESTAMP); + INTEGER = std::move(other122.INTEGER); + UNKNOWN = std::move(other122.UNKNOWN); + JSON = std::move(other122.JSON); + BSON = std::move(other122.BSON); + UUID = std::move(other122.UUID); + FLOAT16 = std::move(other122.FLOAT16); + VARIANT = std::move(other122.VARIANT); + GEOMETRY = std::move(other122.GEOMETRY); + GEOGRAPHY = std::move(other122.GEOGRAPHY); + __isset = other122.__isset; return *this; } void LogicalType::printTo(std::ostream& out) const { @@ -1748,6 +2531,9 @@ void LogicalType::printTo(std::ostream& out) const { out << ", " << "BSON="; (__isset.BSON ? (out << to_string(BSON)) : (out << "")); out << ", " << "UUID="; (__isset.UUID ? (out << to_string(UUID)) : (out << "")); out << ", " << "FLOAT16="; (__isset.FLOAT16 ? (out << to_string(FLOAT16)) : (out << "")); + out << ", " << "VARIANT="; (__isset.VARIANT ? (out << to_string(VARIANT)) : (out << "")); + out << ", " << "GEOMETRY="; (__isset.GEOMETRY ? (out << to_string(GEOMETRY)) : (out << "")); + out << ", " << "GEOGRAPHY="; (__isset.GEOGRAPHY ? (out << to_string(GEOGRAPHY)) : (out << "")); out << ")"; } @@ -1755,6 +2541,17 @@ void LogicalType::printTo(std::ostream& out) const { SchemaElement::~SchemaElement() noexcept { } +SchemaElement::SchemaElement() noexcept + : type(static_cast(0)), + type_length(0), + repetition_type(static_cast(0)), + name(), + num_children(0), + converted_type(static_cast(0)), + scale(0), + precision(0), + field_id(0) { +} void SchemaElement::__set_type(const Type::type val) { this->type = val; @@ -1826,58 +2623,101 @@ void swap(SchemaElement &a, SchemaElement &b) { swap(a.__isset, b.__isset); } -SchemaElement::SchemaElement(const SchemaElement& other99) { - type = other99.type; - type_length = other99.type_length; - repetition_type = other99.repetition_type; - name = other99.name; - num_children = other99.num_children; - converted_type = other99.converted_type; - scale = other99.scale; - precision = other99.precision; - field_id = other99.field_id; - logicalType = other99.logicalType; - __isset = other99.__isset; -} -SchemaElement::SchemaElement(SchemaElement&& other100) noexcept { - type = other100.type; - type_length = other100.type_length; - repetition_type = other100.repetition_type; - name = std::move(other100.name); - num_children = other100.num_children; - converted_type = other100.converted_type; - scale = other100.scale; - precision = other100.precision; - field_id = other100.field_id; - logicalType = std::move(other100.logicalType); - __isset = other100.__isset; -} -SchemaElement& SchemaElement::operator=(const SchemaElement& other101) { - type = other101.type; - type_length = other101.type_length; - repetition_type = other101.repetition_type; - name = other101.name; - num_children = other101.num_children; - converted_type = other101.converted_type; - scale = other101.scale; - precision = other101.precision; - field_id = other101.field_id; - logicalType = other101.logicalType; - __isset = other101.__isset; +bool SchemaElement::operator==(const SchemaElement & rhs) const +{ + if (__isset.type != rhs.__isset.type) + return false; + else if (__isset.type && !(type == rhs.type)) + return false; + if (__isset.type_length != rhs.__isset.type_length) + return false; + else if (__isset.type_length && !(type_length == rhs.type_length)) + return false; + if (__isset.repetition_type != rhs.__isset.repetition_type) + return false; + else if (__isset.repetition_type && !(repetition_type == rhs.repetition_type)) + return false; + if (!(name == rhs.name)) + return false; + if (__isset.num_children != rhs.__isset.num_children) + return false; + else if (__isset.num_children && !(num_children == rhs.num_children)) + return false; + if (__isset.converted_type != rhs.__isset.converted_type) + return false; + else if (__isset.converted_type && !(converted_type == rhs.converted_type)) + return false; + if (__isset.scale != rhs.__isset.scale) + return false; + else if (__isset.scale && !(scale == rhs.scale)) + return false; + if (__isset.precision != rhs.__isset.precision) + return false; + else if (__isset.precision && !(precision == rhs.precision)) + return false; + if (__isset.field_id != rhs.__isset.field_id) + return false; + else if (__isset.field_id && !(field_id == rhs.field_id)) + return false; + if (__isset.logicalType != rhs.__isset.logicalType) + return false; + else if (__isset.logicalType && !(logicalType == rhs.logicalType)) + return false; + return true; +} + +SchemaElement::SchemaElement(const SchemaElement& other126) { + type = other126.type; + type_length = other126.type_length; + repetition_type = other126.repetition_type; + name = other126.name; + num_children = other126.num_children; + converted_type = other126.converted_type; + scale = other126.scale; + precision = other126.precision; + field_id = other126.field_id; + logicalType = other126.logicalType; + __isset = other126.__isset; +} +SchemaElement::SchemaElement(SchemaElement&& other127) noexcept { + type = other127.type; + type_length = other127.type_length; + repetition_type = other127.repetition_type; + name = std::move(other127.name); + num_children = other127.num_children; + converted_type = other127.converted_type; + scale = other127.scale; + precision = other127.precision; + field_id = other127.field_id; + logicalType = std::move(other127.logicalType); + __isset = other127.__isset; +} +SchemaElement& SchemaElement::operator=(const SchemaElement& other128) { + type = other128.type; + type_length = other128.type_length; + repetition_type = other128.repetition_type; + name = other128.name; + num_children = other128.num_children; + converted_type = other128.converted_type; + scale = other128.scale; + precision = other128.precision; + field_id = other128.field_id; + logicalType = other128.logicalType; + __isset = other128.__isset; return *this; } -SchemaElement& SchemaElement::operator=(SchemaElement&& other102) noexcept { - type = other102.type; - type_length = other102.type_length; - repetition_type = other102.repetition_type; - name = std::move(other102.name); - num_children = other102.num_children; - converted_type = other102.converted_type; - scale = other102.scale; - precision = other102.precision; - field_id = other102.field_id; - logicalType = std::move(other102.logicalType); - __isset = other102.__isset; +SchemaElement& SchemaElement::operator=(SchemaElement&& other129) noexcept { + type = other129.type; + type_length = other129.type_length; + repetition_type = other129.repetition_type; + name = std::move(other129.name); + num_children = other129.num_children; + converted_type = other129.converted_type; + scale = other129.scale; + precision = other129.precision; + field_id = other129.field_id; + logicalType = std::move(other129.logicalType); + __isset = other129.__isset; return *this; } void SchemaElement::printTo(std::ostream& out) const { @@ -1900,6 +2740,12 @@ void SchemaElement::printTo(std::ostream& out) const { DataPageHeader::~DataPageHeader() noexcept { } +DataPageHeader::DataPageHeader() noexcept + : num_values(0), + encoding(static_cast(0)), + definition_level_encoding(static_cast(0)), + repetition_level_encoding(static_cast(0)) { +} void DataPageHeader::__set_num_values(const int32_t val) { this->num_values = val; @@ -1938,38 +2784,55 @@ void swap(DataPageHeader &a, DataPageHeader &b) { swap(a.__isset, b.__isset); } -DataPageHeader::DataPageHeader(const DataPageHeader& other106) { - num_values = other106.num_values; - encoding = other106.encoding; - definition_level_encoding = other106.definition_level_encoding; - repetition_level_encoding = other106.repetition_level_encoding; - statistics = other106.statistics; - __isset = other106.__isset; -} -DataPageHeader::DataPageHeader(DataPageHeader&& other107) noexcept { - num_values = other107.num_values; - encoding = other107.encoding; - definition_level_encoding = other107.definition_level_encoding; - repetition_level_encoding = other107.repetition_level_encoding; - statistics = std::move(other107.statistics); - __isset = other107.__isset; -} -DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other108) { - num_values = other108.num_values; - encoding = other108.encoding; - definition_level_encoding = other108.definition_level_encoding; - repetition_level_encoding = other108.repetition_level_encoding; - statistics = other108.statistics; - __isset = other108.__isset; +bool DataPageHeader::operator==(const DataPageHeader & rhs) const +{ + if (!(num_values == rhs.num_values)) + return false; + if (!(encoding == rhs.encoding)) + return false; + if (!(definition_level_encoding == rhs.definition_level_encoding)) + return false; + if (!(repetition_level_encoding == rhs.repetition_level_encoding)) + return false; + if (__isset.statistics != rhs.__isset.statistics) + return false; + else if (__isset.statistics && !(statistics == rhs.statistics)) + return false; + return true; +} + +DataPageHeader::DataPageHeader(const DataPageHeader& other133) { + num_values = other133.num_values; + encoding = other133.encoding; + definition_level_encoding = other133.definition_level_encoding; + repetition_level_encoding = other133.repetition_level_encoding; + statistics = other133.statistics; + __isset = other133.__isset; +} +DataPageHeader::DataPageHeader(DataPageHeader&& other134) noexcept { + num_values = other134.num_values; + encoding = other134.encoding; + definition_level_encoding = other134.definition_level_encoding; + repetition_level_encoding = other134.repetition_level_encoding; + statistics = std::move(other134.statistics); + __isset = other134.__isset; +} +DataPageHeader& DataPageHeader::operator=(const DataPageHeader& other135) { + num_values = other135.num_values; + encoding = other135.encoding; + definition_level_encoding = other135.definition_level_encoding; + repetition_level_encoding = other135.repetition_level_encoding; + statistics = other135.statistics; + __isset = other135.__isset; return *this; } -DataPageHeader& DataPageHeader::operator=(DataPageHeader&& other109) noexcept { - num_values = other109.num_values; - encoding = other109.encoding; - definition_level_encoding = other109.definition_level_encoding; - repetition_level_encoding = other109.repetition_level_encoding; - statistics = std::move(other109.statistics); - __isset = other109.__isset; +DataPageHeader& DataPageHeader::operator=(DataPageHeader&& other136) noexcept { + num_values = other136.num_values; + encoding = other136.encoding; + definition_level_encoding = other136.definition_level_encoding; + repetition_level_encoding = other136.repetition_level_encoding; + statistics = std::move(other136.statistics); + __isset = other136.__isset; return *this; } void DataPageHeader::printTo(std::ostream& out) const { @@ -1987,6 +2850,8 @@ void DataPageHeader::printTo(std::ostream& out) const { IndexPageHeader::~IndexPageHeader() noexcept { } +IndexPageHeader::IndexPageHeader() noexcept { +} std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj) { obj.printTo(out); @@ -2000,18 +2865,23 @@ void swap(IndexPageHeader &a, IndexPageHeader &b) { (void) b; } -IndexPageHeader::IndexPageHeader(const IndexPageHeader& other110) noexcept { - (void) other110; +bool IndexPageHeader::operator==(const IndexPageHeader & /* rhs */) const +{ + return true; +} + +IndexPageHeader::IndexPageHeader(const IndexPageHeader& other137) noexcept { + (void) other137; } -IndexPageHeader::IndexPageHeader(IndexPageHeader&& other111) noexcept { - (void) other111; +IndexPageHeader::IndexPageHeader(IndexPageHeader&& other138) noexcept { + (void) other138; } -IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other112) noexcept { - (void) other112; +IndexPageHeader& IndexPageHeader::operator=(const IndexPageHeader& other139) noexcept { + (void) other139; return *this; } -IndexPageHeader& IndexPageHeader::operator=(IndexPageHeader&& other113) noexcept { - (void) other113; +IndexPageHeader& IndexPageHeader::operator=(IndexPageHeader&& other140) noexcept { + (void) other140; return *this; } void IndexPageHeader::printTo(std::ostream& out) const { @@ -2024,6 +2894,11 @@ void IndexPageHeader::printTo(std::ostream& out) const { DictionaryPageHeader::~DictionaryPageHeader() noexcept { } +DictionaryPageHeader::DictionaryPageHeader() noexcept + : num_values(0), + encoding(static_cast(0)), + is_sorted(0) { +} void DictionaryPageHeader::__set_num_values(const int32_t val) { this->num_values = val; @@ -2052,30 +2927,43 @@ void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) { swap(a.__isset, b.__isset); } -DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other115) noexcept { - num_values = other115.num_values; - encoding = other115.encoding; - is_sorted = other115.is_sorted; - __isset = other115.__isset; -} -DictionaryPageHeader::DictionaryPageHeader(DictionaryPageHeader&& other116) noexcept { - num_values = other116.num_values; - encoding = other116.encoding; - is_sorted = other116.is_sorted; - __isset = other116.__isset; -} -DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other117) noexcept { - num_values = other117.num_values; - encoding = other117.encoding; - is_sorted = other117.is_sorted; - __isset = other117.__isset; +bool DictionaryPageHeader::operator==(const DictionaryPageHeader & rhs) const +{ + if (!(num_values == rhs.num_values)) + return false; + if (!(encoding == rhs.encoding)) + return false; + if (__isset.is_sorted != rhs.__isset.is_sorted) + return false; + else if (__isset.is_sorted && !(is_sorted == rhs.is_sorted)) + return false; + return true; +} + +DictionaryPageHeader::DictionaryPageHeader(const DictionaryPageHeader& other142) noexcept { + num_values = other142.num_values; + encoding = other142.encoding; + is_sorted = other142.is_sorted; + __isset = other142.__isset; +} +DictionaryPageHeader::DictionaryPageHeader(DictionaryPageHeader&& other143) noexcept { + num_values = other143.num_values; + encoding = other143.encoding; + is_sorted = other143.is_sorted; + __isset = other143.__isset; +} +DictionaryPageHeader& DictionaryPageHeader::operator=(const DictionaryPageHeader& other144) noexcept { + num_values = other144.num_values; + encoding = other144.encoding; + is_sorted = other144.is_sorted; + __isset = other144.__isset; return *this; } -DictionaryPageHeader& DictionaryPageHeader::operator=(DictionaryPageHeader&& other118) noexcept { - num_values = other118.num_values; - encoding = other118.encoding; - is_sorted = other118.is_sorted; - __isset = other118.__isset; +DictionaryPageHeader& DictionaryPageHeader::operator=(DictionaryPageHeader&& other145) noexcept { + num_values = other145.num_values; + encoding = other145.encoding; + is_sorted = other145.is_sorted; + __isset = other145.__isset; return *this; } void DictionaryPageHeader::printTo(std::ostream& out) const { @@ -2091,6 +2979,15 @@ void DictionaryPageHeader::printTo(std::ostream& out) const { DataPageHeaderV2::~DataPageHeaderV2() noexcept { } +DataPageHeaderV2::DataPageHeaderV2() noexcept + : num_values(0), + num_nulls(0), + num_rows(0), + encoding(static_cast(0)), + definition_levels_byte_length(0), + repetition_levels_byte_length(0), + is_compressed(true) { +} void DataPageHeaderV2::__set_num_values(const int32_t val) { this->num_values = val; @@ -2145,50 +3042,75 @@ void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) { swap(a.__isset, b.__isset); } -DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other120) { - num_values = other120.num_values; - num_nulls = other120.num_nulls; - num_rows = other120.num_rows; - encoding = other120.encoding; - definition_levels_byte_length = other120.definition_levels_byte_length; - repetition_levels_byte_length = other120.repetition_levels_byte_length; - is_compressed = other120.is_compressed; - statistics = other120.statistics; - __isset = other120.__isset; -} -DataPageHeaderV2::DataPageHeaderV2(DataPageHeaderV2&& other121) noexcept { - num_values = other121.num_values; - num_nulls = other121.num_nulls; - num_rows = other121.num_rows; - encoding = other121.encoding; - definition_levels_byte_length = other121.definition_levels_byte_length; - repetition_levels_byte_length = other121.repetition_levels_byte_length; - is_compressed = other121.is_compressed; - statistics = std::move(other121.statistics); - __isset = other121.__isset; +bool DataPageHeaderV2::operator==(const DataPageHeaderV2 & rhs) const +{ + if (!(num_values == rhs.num_values)) + return false; + if (!(num_nulls == rhs.num_nulls)) + return false; + if (!(num_rows == rhs.num_rows)) + return false; + if (!(encoding == rhs.encoding)) + return false; + if (!(definition_levels_byte_length == rhs.definition_levels_byte_length)) + return false; + if (!(repetition_levels_byte_length == rhs.repetition_levels_byte_length)) + return false; + if (__isset.is_compressed != rhs.__isset.is_compressed) + return false; + else if (__isset.is_compressed && !(is_compressed == rhs.is_compressed)) + return false; + if (__isset.statistics != rhs.__isset.statistics) + return false; + else if (__isset.statistics && !(statistics == rhs.statistics)) + return false; + return true; +} + +DataPageHeaderV2::DataPageHeaderV2(const DataPageHeaderV2& other147) { + num_values = other147.num_values; + num_nulls = other147.num_nulls; + num_rows = other147.num_rows; + encoding = other147.encoding; + definition_levels_byte_length = other147.definition_levels_byte_length; + repetition_levels_byte_length = other147.repetition_levels_byte_length; + is_compressed = other147.is_compressed; + statistics = other147.statistics; + __isset = other147.__isset; } -DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other122) { - num_values = other122.num_values; - num_nulls = other122.num_nulls; - num_rows = other122.num_rows; - encoding = other122.encoding; - definition_levels_byte_length = other122.definition_levels_byte_length; - repetition_levels_byte_length = other122.repetition_levels_byte_length; - is_compressed = other122.is_compressed; - statistics = other122.statistics; - __isset = other122.__isset; +DataPageHeaderV2::DataPageHeaderV2(DataPageHeaderV2&& other148) noexcept { + num_values = other148.num_values; + num_nulls = other148.num_nulls; + num_rows = other148.num_rows; + encoding = other148.encoding; + definition_levels_byte_length = other148.definition_levels_byte_length; + repetition_levels_byte_length = other148.repetition_levels_byte_length; + is_compressed = other148.is_compressed; + statistics = std::move(other148.statistics); + __isset = other148.__isset; +} +DataPageHeaderV2& DataPageHeaderV2::operator=(const DataPageHeaderV2& other149) { + num_values = other149.num_values; + num_nulls = other149.num_nulls; + num_rows = other149.num_rows; + encoding = other149.encoding; + definition_levels_byte_length = other149.definition_levels_byte_length; + repetition_levels_byte_length = other149.repetition_levels_byte_length; + is_compressed = other149.is_compressed; + statistics = other149.statistics; + __isset = other149.__isset; return *this; } -DataPageHeaderV2& DataPageHeaderV2::operator=(DataPageHeaderV2&& other123) noexcept { - num_values = other123.num_values; - num_nulls = other123.num_nulls; - num_rows = other123.num_rows; - encoding = other123.encoding; - definition_levels_byte_length = other123.definition_levels_byte_length; - repetition_levels_byte_length = other123.repetition_levels_byte_length; - is_compressed = other123.is_compressed; - statistics = std::move(other123.statistics); - __isset = other123.__isset; +DataPageHeaderV2& DataPageHeaderV2::operator=(DataPageHeaderV2&& other150) noexcept { + num_values = other150.num_values; + num_nulls = other150.num_nulls; + num_rows = other150.num_rows; + encoding = other150.encoding; + definition_levels_byte_length = other150.definition_levels_byte_length; + repetition_levels_byte_length = other150.repetition_levels_byte_length; + is_compressed = other150.is_compressed; + statistics = std::move(other150.statistics); + __isset = other150.__isset; return *this; } void DataPageHeaderV2::printTo(std::ostream& out) const { @@ -2209,6 +3131,8 @@ void DataPageHeaderV2::printTo(std::ostream& out) const { SplitBlockAlgorithm::~SplitBlockAlgorithm() noexcept { } +SplitBlockAlgorithm::SplitBlockAlgorithm() noexcept { +} std::ostream& operator<<(std::ostream& out, const SplitBlockAlgorithm& obj) { obj.printTo(out); @@ -2222,18 +3146,23 @@ void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b) { (void) b; } -SplitBlockAlgorithm::SplitBlockAlgorithm(const SplitBlockAlgorithm& other124) noexcept { - (void) other124; +bool SplitBlockAlgorithm::operator==(const SplitBlockAlgorithm & /* rhs */) const +{ + return true; +} + +SplitBlockAlgorithm::SplitBlockAlgorithm(const SplitBlockAlgorithm& other151) noexcept { + (void) other151; } -SplitBlockAlgorithm::SplitBlockAlgorithm(SplitBlockAlgorithm&& other125) noexcept { - (void) other125; +SplitBlockAlgorithm::SplitBlockAlgorithm(SplitBlockAlgorithm&& other152) noexcept { + (void) other152; } -SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(const SplitBlockAlgorithm& other126) noexcept { - (void) other126; +SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(const SplitBlockAlgorithm& other153) noexcept { + (void) other153; return *this; } -SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(SplitBlockAlgorithm&& other127) noexcept { - (void) other127; +SplitBlockAlgorithm& SplitBlockAlgorithm::operator=(SplitBlockAlgorithm&& other154) noexcept { + (void) other154; return *this; } void SplitBlockAlgorithm::printTo(std::ostream& out) const { @@ -2246,6 +3175,8 @@ void SplitBlockAlgorithm::printTo(std::ostream& out) const { BloomFilterAlgorithm::~BloomFilterAlgorithm() noexcept { } +BloomFilterAlgorithm::BloomFilterAlgorithm() noexcept { +} void BloomFilterAlgorithm::__set_BLOCK(const SplitBlockAlgorithm& val) { this->BLOCK = val; @@ -2264,22 +3195,31 @@ void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b) { swap(a.__isset, b.__isset); } -BloomFilterAlgorithm::BloomFilterAlgorithm(const BloomFilterAlgorithm& other128) noexcept { - BLOCK = other128.BLOCK; - __isset = other128.__isset; +bool BloomFilterAlgorithm::operator==(const BloomFilterAlgorithm & rhs) const +{ + if (__isset.BLOCK != rhs.__isset.BLOCK) + return false; + else if (__isset.BLOCK && !(BLOCK == rhs.BLOCK)) + return false; + return true; } -BloomFilterAlgorithm::BloomFilterAlgorithm(BloomFilterAlgorithm&& other129) noexcept { - BLOCK = std::move(other129.BLOCK); - __isset = other129.__isset; + +BloomFilterAlgorithm::BloomFilterAlgorithm(const BloomFilterAlgorithm& other155) noexcept { + BLOCK = other155.BLOCK; + __isset = other155.__isset; +} +BloomFilterAlgorithm::BloomFilterAlgorithm(BloomFilterAlgorithm&& other156) noexcept { + BLOCK = std::move(other156.BLOCK); + __isset = other156.__isset; } -BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(const BloomFilterAlgorithm& other130) noexcept { - BLOCK = other130.BLOCK; - __isset = other130.__isset; +BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(const BloomFilterAlgorithm& other157) noexcept { + BLOCK = other157.BLOCK; + __isset = other157.__isset; return *this; } -BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(BloomFilterAlgorithm&& other131) noexcept { - BLOCK = std::move(other131.BLOCK); - __isset = other131.__isset; +BloomFilterAlgorithm& BloomFilterAlgorithm::operator=(BloomFilterAlgorithm&& other158) noexcept { + BLOCK = std::move(other158.BLOCK); + __isset = other158.__isset; return *this; } void BloomFilterAlgorithm::printTo(std::ostream& out) const { @@ -2293,6 +3233,8 @@ void BloomFilterAlgorithm::printTo(std::ostream& out) const { XxHash::~XxHash() noexcept { } +XxHash::XxHash() noexcept { +} std::ostream& operator<<(std::ostream& out, const XxHash& obj) { obj.printTo(out); @@ -2306,18 +3248,23 @@ void swap(XxHash &a, XxHash &b) { (void) b; } -XxHash::XxHash(const XxHash& other132) noexcept { - (void) other132; +bool XxHash::operator==(const XxHash & /* rhs */) const +{ + return true; +} + +XxHash::XxHash(const XxHash& other159) noexcept { + (void) other159; } -XxHash::XxHash(XxHash&& other133) noexcept { - (void) other133; +XxHash::XxHash(XxHash&& other160) noexcept { + (void) other160; } -XxHash& XxHash::operator=(const XxHash& other134) noexcept { - (void) other134; +XxHash& XxHash::operator=(const XxHash& other161) noexcept { + (void) other161; return *this; } -XxHash& XxHash::operator=(XxHash&& other135) noexcept { - (void) other135; +XxHash& XxHash::operator=(XxHash&& other162) noexcept { + (void) other162; return *this; } void XxHash::printTo(std::ostream& out) const { @@ -2330,6 +3277,8 @@ void XxHash::printTo(std::ostream& out) const { BloomFilterHash::~BloomFilterHash() noexcept { } +BloomFilterHash::BloomFilterHash() noexcept { +} void BloomFilterHash::__set_XXHASH(const XxHash& val) { this->XXHASH = val; @@ -2348,22 +3297,31 @@ void swap(BloomFilterHash &a, BloomFilterHash &b) { swap(a.__isset, b.__isset); } -BloomFilterHash::BloomFilterHash(const BloomFilterHash& other136) noexcept { - XXHASH = other136.XXHASH; - __isset = other136.__isset; +bool BloomFilterHash::operator==(const BloomFilterHash & rhs) const +{ + if (__isset.XXHASH != rhs.__isset.XXHASH) + return false; + else if (__isset.XXHASH && !(XXHASH == rhs.XXHASH)) + return false; + return true; +} + +BloomFilterHash::BloomFilterHash(const BloomFilterHash& other163) noexcept { + XXHASH = other163.XXHASH; + __isset = other163.__isset; } -BloomFilterHash::BloomFilterHash(BloomFilterHash&& other137) noexcept { - XXHASH = std::move(other137.XXHASH); - __isset = other137.__isset; +BloomFilterHash::BloomFilterHash(BloomFilterHash&& other164) noexcept { + XXHASH = std::move(other164.XXHASH); + __isset = other164.__isset; } -BloomFilterHash& BloomFilterHash::operator=(const BloomFilterHash& other138) noexcept { - XXHASH = other138.XXHASH; - __isset = other138.__isset; +BloomFilterHash& BloomFilterHash::operator=(const BloomFilterHash& other165) noexcept { + XXHASH = other165.XXHASH; + __isset = other165.__isset; return *this; } -BloomFilterHash& BloomFilterHash::operator=(BloomFilterHash&& other139) noexcept { - XXHASH = std::move(other139.XXHASH); - __isset = other139.__isset; +BloomFilterHash& BloomFilterHash::operator=(BloomFilterHash&& other166) noexcept { + XXHASH = std::move(other166.XXHASH); + __isset = other166.__isset; return *this; } void BloomFilterHash::printTo(std::ostream& out) const { @@ -2377,6 +3335,8 @@ void BloomFilterHash::printTo(std::ostream& out) const { Uncompressed::~Uncompressed() noexcept { } +Uncompressed::Uncompressed() noexcept { +} std::ostream& operator<<(std::ostream& out, const Uncompressed& obj) { obj.printTo(out); @@ -2390,18 +3350,23 @@ void swap(Uncompressed &a, Uncompressed &b) { (void) b; } -Uncompressed::Uncompressed(const Uncompressed& other140) noexcept { - (void) other140; +bool Uncompressed::operator==(const Uncompressed & /* rhs */) const +{ + return true; } -Uncompressed::Uncompressed(Uncompressed&& other141) noexcept { - (void) other141; + +Uncompressed::Uncompressed(const Uncompressed& other167) noexcept { + (void) other167; } -Uncompressed& Uncompressed::operator=(const Uncompressed& other142) noexcept { - (void) other142; +Uncompressed::Uncompressed(Uncompressed&& other168) noexcept { + (void) other168; +} +Uncompressed& Uncompressed::operator=(const Uncompressed& other169) noexcept { + (void) other169; return *this; } -Uncompressed& Uncompressed::operator=(Uncompressed&& other143) noexcept { - (void) other143; +Uncompressed& Uncompressed::operator=(Uncompressed&& other170) noexcept { + (void) other170; return *this; } void Uncompressed::printTo(std::ostream& out) const { @@ -2414,6 +3379,8 @@ void Uncompressed::printTo(std::ostream& out) const { BloomFilterCompression::~BloomFilterCompression() noexcept { } +BloomFilterCompression::BloomFilterCompression() noexcept { +} void BloomFilterCompression::__set_UNCOMPRESSED(const Uncompressed& val) { this->UNCOMPRESSED = val; @@ -2432,22 +3399,31 @@ void swap(BloomFilterCompression &a, BloomFilterCompression &b) { swap(a.__isset, b.__isset); } -BloomFilterCompression::BloomFilterCompression(const BloomFilterCompression& other144) noexcept { - UNCOMPRESSED = other144.UNCOMPRESSED; - __isset = other144.__isset; +bool BloomFilterCompression::operator==(const BloomFilterCompression & rhs) const +{ + if (__isset.UNCOMPRESSED != rhs.__isset.UNCOMPRESSED) + return false; + else if (__isset.UNCOMPRESSED && !(UNCOMPRESSED == rhs.UNCOMPRESSED)) + return false; + return true; } -BloomFilterCompression::BloomFilterCompression(BloomFilterCompression&& other145) noexcept { - UNCOMPRESSED = std::move(other145.UNCOMPRESSED); - __isset = other145.__isset; + +BloomFilterCompression::BloomFilterCompression(const BloomFilterCompression& other171) noexcept { + UNCOMPRESSED = other171.UNCOMPRESSED; + __isset = other171.__isset; } -BloomFilterCompression& BloomFilterCompression::operator=(const BloomFilterCompression& other146) noexcept { - UNCOMPRESSED = other146.UNCOMPRESSED; - __isset = other146.__isset; +BloomFilterCompression::BloomFilterCompression(BloomFilterCompression&& other172) noexcept { + UNCOMPRESSED = std::move(other172.UNCOMPRESSED); + __isset = other172.__isset; +} +BloomFilterCompression& BloomFilterCompression::operator=(const BloomFilterCompression& other173) noexcept { + UNCOMPRESSED = other173.UNCOMPRESSED; + __isset = other173.__isset; return *this; } -BloomFilterCompression& BloomFilterCompression::operator=(BloomFilterCompression&& other147) noexcept { - UNCOMPRESSED = std::move(other147.UNCOMPRESSED); - __isset = other147.__isset; +BloomFilterCompression& BloomFilterCompression::operator=(BloomFilterCompression&& other174) noexcept { + UNCOMPRESSED = std::move(other174.UNCOMPRESSED); + __isset = other174.__isset; return *this; } void BloomFilterCompression::printTo(std::ostream& out) const { @@ -2461,6 +3437,9 @@ void BloomFilterCompression::printTo(std::ostream& out) const { BloomFilterHeader::~BloomFilterHeader() noexcept { } +BloomFilterHeader::BloomFilterHeader() noexcept + : numBytes(0) { +} void BloomFilterHeader::__set_numBytes(const int32_t val) { this->numBytes = val; @@ -2492,30 +3471,43 @@ void swap(BloomFilterHeader &a, BloomFilterHeader &b) { swap(a.compression, b.compression); } -BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other148) noexcept { - numBytes = other148.numBytes; - algorithm = other148.algorithm; - hash = other148.hash; - compression = other148.compression; -} -BloomFilterHeader::BloomFilterHeader(BloomFilterHeader&& other149) noexcept { - numBytes = other149.numBytes; - algorithm = std::move(other149.algorithm); - hash = std::move(other149.hash); - compression = std::move(other149.compression); -} -BloomFilterHeader& BloomFilterHeader::operator=(const BloomFilterHeader& other150) noexcept { - numBytes = other150.numBytes; - algorithm = other150.algorithm; - hash = other150.hash; - compression = other150.compression; +bool BloomFilterHeader::operator==(const BloomFilterHeader & rhs) const +{ + if (!(numBytes == rhs.numBytes)) + return false; + if (!(algorithm == rhs.algorithm)) + return false; + if (!(hash == rhs.hash)) + return false; + if (!(compression == rhs.compression)) + return false; + return true; +} + +BloomFilterHeader::BloomFilterHeader(const BloomFilterHeader& other175) noexcept { + numBytes = other175.numBytes; + algorithm = other175.algorithm; + hash = other175.hash; + compression = other175.compression; +} +BloomFilterHeader::BloomFilterHeader(BloomFilterHeader&& other176) noexcept { + numBytes = other176.numBytes; + algorithm = std::move(other176.algorithm); + hash = std::move(other176.hash); + compression = std::move(other176.compression); +} +BloomFilterHeader& BloomFilterHeader::operator=(const BloomFilterHeader& other177) noexcept { + numBytes = other177.numBytes; + algorithm = other177.algorithm; + hash = other177.hash; + compression = other177.compression; return *this; } -BloomFilterHeader& BloomFilterHeader::operator=(BloomFilterHeader&& other151) noexcept { - numBytes = other151.numBytes; - algorithm = std::move(other151.algorithm); - hash = std::move(other151.hash); - compression = std::move(other151.compression); +BloomFilterHeader& BloomFilterHeader::operator=(BloomFilterHeader&& other178) noexcept { + numBytes = other178.numBytes; + algorithm = std::move(other178.algorithm); + hash = std::move(other178.hash); + compression = std::move(other178.compression); return *this; } void BloomFilterHeader::printTo(std::ostream& out) const { @@ -2532,6 +3524,12 @@ void BloomFilterHeader::printTo(std::ostream& out) const { PageHeader::~PageHeader() noexcept { } +PageHeader::PageHeader() noexcept + : type(static_cast(0)), + uncompressed_page_size(0), + compressed_page_size(0), + crc(0) { +} void PageHeader::__set_type(const PageType::type val) { this->type = val; @@ -2589,50 +3587,81 @@ void swap(PageHeader &a, PageHeader &b) { swap(a.__isset, b.__isset); } -PageHeader::PageHeader(const PageHeader& other153) { - type = other153.type; - uncompressed_page_size = other153.uncompressed_page_size; - compressed_page_size = other153.compressed_page_size; - crc = other153.crc; - data_page_header = other153.data_page_header; - index_page_header = other153.index_page_header; - dictionary_page_header = other153.dictionary_page_header; - data_page_header_v2 = other153.data_page_header_v2; - __isset = other153.__isset; -} -PageHeader::PageHeader(PageHeader&& other154) noexcept { - type = other154.type; - uncompressed_page_size = other154.uncompressed_page_size; - compressed_page_size = other154.compressed_page_size; - crc = other154.crc; - data_page_header = std::move(other154.data_page_header); - index_page_header = std::move(other154.index_page_header); - dictionary_page_header = std::move(other154.dictionary_page_header); - data_page_header_v2 = std::move(other154.data_page_header_v2); - __isset = other154.__isset; -} -PageHeader& PageHeader::operator=(const PageHeader& other155) { - type = other155.type; - uncompressed_page_size = other155.uncompressed_page_size; - compressed_page_size = other155.compressed_page_size; - crc = other155.crc; - data_page_header = other155.data_page_header; - index_page_header = other155.index_page_header; - dictionary_page_header = other155.dictionary_page_header; - data_page_header_v2 = other155.data_page_header_v2; - __isset = other155.__isset; +bool PageHeader::operator==(const PageHeader & rhs) const +{ + if (!(type == rhs.type)) + return false; + if (!(uncompressed_page_size == rhs.uncompressed_page_size)) + return false; + if (!(compressed_page_size == rhs.compressed_page_size)) + return false; + if (__isset.crc != rhs.__isset.crc) + return false; + else if (__isset.crc && !(crc == rhs.crc)) + return false; + if (__isset.data_page_header != rhs.__isset.data_page_header) + return false; + else if (__isset.data_page_header && !(data_page_header == rhs.data_page_header)) + return false; + if (__isset.index_page_header != rhs.__isset.index_page_header) + return false; + else if (__isset.index_page_header && !(index_page_header == rhs.index_page_header)) + return false; + if (__isset.dictionary_page_header != rhs.__isset.dictionary_page_header) + return false; + else if (__isset.dictionary_page_header && !(dictionary_page_header == rhs.dictionary_page_header)) + return false; + if (__isset.data_page_header_v2 != rhs.__isset.data_page_header_v2) + return false; + else if (__isset.data_page_header_v2 && !(data_page_header_v2 == rhs.data_page_header_v2)) + return false; + return true; +} + +PageHeader::PageHeader(const PageHeader& other180) { + type = other180.type; + uncompressed_page_size = other180.uncompressed_page_size; + compressed_page_size = other180.compressed_page_size; + crc = other180.crc; + data_page_header = other180.data_page_header; + index_page_header = other180.index_page_header; + dictionary_page_header = other180.dictionary_page_header; + data_page_header_v2 = other180.data_page_header_v2; + __isset = other180.__isset; +} +PageHeader::PageHeader(PageHeader&& other181) noexcept { + type = other181.type; + uncompressed_page_size = other181.uncompressed_page_size; + compressed_page_size = other181.compressed_page_size; + crc = other181.crc; + data_page_header = std::move(other181.data_page_header); + index_page_header = std::move(other181.index_page_header); + dictionary_page_header = std::move(other181.dictionary_page_header); + data_page_header_v2 = std::move(other181.data_page_header_v2); + __isset = other181.__isset; +} +PageHeader& PageHeader::operator=(const PageHeader& other182) { + type = other182.type; + uncompressed_page_size = other182.uncompressed_page_size; + compressed_page_size = other182.compressed_page_size; + crc = other182.crc; + data_page_header = other182.data_page_header; + index_page_header = other182.index_page_header; + dictionary_page_header = other182.dictionary_page_header; + data_page_header_v2 = other182.data_page_header_v2; + __isset = other182.__isset; return *this; } -PageHeader& PageHeader::operator=(PageHeader&& other156) noexcept { - type = other156.type; - uncompressed_page_size = other156.uncompressed_page_size; - compressed_page_size = other156.compressed_page_size; - crc = other156.crc; - data_page_header = std::move(other156.data_page_header); - index_page_header = std::move(other156.index_page_header); - dictionary_page_header = std::move(other156.dictionary_page_header); - data_page_header_v2 = std::move(other156.data_page_header_v2); - __isset = other156.__isset; +PageHeader& PageHeader::operator=(PageHeader&& other183) noexcept { + type = other183.type; + uncompressed_page_size = other183.uncompressed_page_size; + compressed_page_size = other183.compressed_page_size; + crc = other183.crc; + data_page_header = std::move(other183.data_page_header); + index_page_header = std::move(other183.index_page_header); + dictionary_page_header = std::move(other183.dictionary_page_header); + data_page_header_v2 = std::move(other183.data_page_header_v2); + __isset = other183.__isset; return *this; } void PageHeader::printTo(std::ostream& out) const { @@ -2653,6 +3682,10 @@ void PageHeader::printTo(std::ostream& out) const { KeyValue::~KeyValue() noexcept { } +KeyValue::KeyValue() noexcept + : key(), + value() { +} void KeyValue::__set_key(const std::string& val) { this->key = val; @@ -2676,26 +3709,37 @@ void swap(KeyValue &a, KeyValue &b) { swap(a.__isset, b.__isset); } -KeyValue::KeyValue(const KeyValue& other157) { - key = other157.key; - value = other157.value; - __isset = other157.__isset; -} -KeyValue::KeyValue(KeyValue&& other158) noexcept { - key = std::move(other158.key); - value = std::move(other158.value); - __isset = other158.__isset; -} -KeyValue& KeyValue::operator=(const KeyValue& other159) { - key = other159.key; - value = other159.value; - __isset = other159.__isset; +bool KeyValue::operator==(const KeyValue & rhs) const +{ + if (!(key == rhs.key)) + return false; + if (__isset.value != rhs.__isset.value) + return false; + else if (__isset.value && !(value == rhs.value)) + return false; + return true; +} + +KeyValue::KeyValue(const KeyValue& other184) { + key = other184.key; + value = other184.value; + __isset = other184.__isset; +} +KeyValue::KeyValue(KeyValue&& other185) noexcept { + key = std::move(other185.key); + value = std::move(other185.value); + __isset = other185.__isset; +} +KeyValue& KeyValue::operator=(const KeyValue& other186) { + key = other186.key; + value = other186.value; + __isset = other186.__isset; return *this; } -KeyValue& KeyValue::operator=(KeyValue&& other160) noexcept { - key = std::move(other160.key); - value = std::move(other160.value); - __isset = other160.__isset; +KeyValue& KeyValue::operator=(KeyValue&& other187) noexcept { + key = std::move(other187.key); + value = std::move(other187.value); + __isset = other187.__isset; return *this; } void KeyValue::printTo(std::ostream& out) const { @@ -2710,6 +3754,11 @@ void KeyValue::printTo(std::ostream& out) const { SortingColumn::~SortingColumn() noexcept { } +SortingColumn::SortingColumn() noexcept + : column_idx(0), + descending(0), + nulls_first(0) { +} void SortingColumn::__set_column_idx(const int32_t val) { this->column_idx = val; @@ -2736,26 +3785,37 @@ void swap(SortingColumn &a, SortingColumn &b) { swap(a.nulls_first, b.nulls_first); } -SortingColumn::SortingColumn(const SortingColumn& other161) noexcept { - column_idx = other161.column_idx; - descending = other161.descending; - nulls_first = other161.nulls_first; -} -SortingColumn::SortingColumn(SortingColumn&& other162) noexcept { - column_idx = other162.column_idx; - descending = other162.descending; - nulls_first = other162.nulls_first; -} -SortingColumn& SortingColumn::operator=(const SortingColumn& other163) noexcept { - column_idx = other163.column_idx; - descending = other163.descending; - nulls_first = other163.nulls_first; +bool SortingColumn::operator==(const SortingColumn & rhs) const +{ + if (!(column_idx == rhs.column_idx)) + return false; + if (!(descending == rhs.descending)) + return false; + if (!(nulls_first == rhs.nulls_first)) + return false; + return true; +} + +SortingColumn::SortingColumn(const SortingColumn& other188) noexcept { + column_idx = other188.column_idx; + descending = other188.descending; + nulls_first = other188.nulls_first; +} +SortingColumn::SortingColumn(SortingColumn&& other189) noexcept { + column_idx = other189.column_idx; + descending = other189.descending; + nulls_first = other189.nulls_first; +} +SortingColumn& SortingColumn::operator=(const SortingColumn& other190) noexcept { + column_idx = other190.column_idx; + descending = other190.descending; + nulls_first = other190.nulls_first; return *this; } -SortingColumn& SortingColumn::operator=(SortingColumn&& other164) noexcept { - column_idx = other164.column_idx; - descending = other164.descending; - nulls_first = other164.nulls_first; +SortingColumn& SortingColumn::operator=(SortingColumn&& other191) noexcept { + column_idx = other191.column_idx; + descending = other191.descending; + nulls_first = other191.nulls_first; return *this; } void SortingColumn::printTo(std::ostream& out) const { @@ -2771,6 +3831,11 @@ void SortingColumn::printTo(std::ostream& out) const { PageEncodingStats::~PageEncodingStats() noexcept { } +PageEncodingStats::PageEncodingStats() noexcept + : page_type(static_cast(0)), + encoding(static_cast(0)), + count(0) { +} void PageEncodingStats::__set_page_type(const PageType::type val) { this->page_type = val; @@ -2797,26 +3862,37 @@ void swap(PageEncodingStats &a, PageEncodingStats &b) { swap(a.count, b.count); } -PageEncodingStats::PageEncodingStats(const PageEncodingStats& other167) noexcept { - page_type = other167.page_type; - encoding = other167.encoding; - count = other167.count; -} -PageEncodingStats::PageEncodingStats(PageEncodingStats&& other168) noexcept { - page_type = other168.page_type; - encoding = other168.encoding; - count = other168.count; -} -PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other169) noexcept { - page_type = other169.page_type; - encoding = other169.encoding; - count = other169.count; +bool PageEncodingStats::operator==(const PageEncodingStats & rhs) const +{ + if (!(page_type == rhs.page_type)) + return false; + if (!(encoding == rhs.encoding)) + return false; + if (!(count == rhs.count)) + return false; + return true; +} + +PageEncodingStats::PageEncodingStats(const PageEncodingStats& other194) noexcept { + page_type = other194.page_type; + encoding = other194.encoding; + count = other194.count; +} +PageEncodingStats::PageEncodingStats(PageEncodingStats&& other195) noexcept { + page_type = other195.page_type; + encoding = other195.encoding; + count = other195.count; +} +PageEncodingStats& PageEncodingStats::operator=(const PageEncodingStats& other196) noexcept { + page_type = other196.page_type; + encoding = other196.encoding; + count = other196.count; return *this; } -PageEncodingStats& PageEncodingStats::operator=(PageEncodingStats&& other170) noexcept { - page_type = other170.page_type; - encoding = other170.encoding; - count = other170.count; +PageEncodingStats& PageEncodingStats::operator=(PageEncodingStats&& other197) noexcept { + page_type = other197.page_type; + encoding = other197.encoding; + count = other197.count; return *this; } void PageEncodingStats::printTo(std::ostream& out) const { @@ -2832,6 +3908,18 @@ void PageEncodingStats::printTo(std::ostream& out) const { ColumnMetaData::~ColumnMetaData() noexcept { } +ColumnMetaData::ColumnMetaData() noexcept + : type(static_cast(0)), + codec(static_cast(0)), + num_values(0), + total_uncompressed_size(0), + total_compressed_size(0), + data_page_offset(0), + index_page_offset(0), + dictionary_page_offset(0), + bloom_filter_offset(0), + bloom_filter_length(0) { +} void ColumnMetaData::__set_type(const Type::type val) { this->type = val; @@ -2904,6 +3992,11 @@ void ColumnMetaData::__set_size_statistics(const SizeStatistics& val) { this->size_statistics = val; __isset.size_statistics = true; } + +void ColumnMetaData::__set_geospatial_statistics(const GeospatialStatistics& val) { + this->geospatial_statistics = val; +__isset.geospatial_statistics = true; +} std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj) { obj.printTo(out); @@ -2929,85 +4022,147 @@ void swap(ColumnMetaData &a, ColumnMetaData &b) { swap(a.bloom_filter_offset, b.bloom_filter_offset); swap(a.bloom_filter_length, b.bloom_filter_length); swap(a.size_statistics, b.size_statistics); + swap(a.geospatial_statistics, b.geospatial_statistics); swap(a.__isset, b.__isset); } -ColumnMetaData::ColumnMetaData(const ColumnMetaData& other198) { - type = other198.type; - encodings = other198.encodings; - path_in_schema = other198.path_in_schema; - codec = other198.codec; - num_values = other198.num_values; - total_uncompressed_size = other198.total_uncompressed_size; - total_compressed_size = other198.total_compressed_size; - key_value_metadata = other198.key_value_metadata; - data_page_offset = other198.data_page_offset; - index_page_offset = other198.index_page_offset; - dictionary_page_offset = other198.dictionary_page_offset; - statistics = other198.statistics; - encoding_stats = other198.encoding_stats; - bloom_filter_offset = other198.bloom_filter_offset; - bloom_filter_length = other198.bloom_filter_length; - size_statistics = other198.size_statistics; - __isset = other198.__isset; -} -ColumnMetaData::ColumnMetaData(ColumnMetaData&& other199) noexcept { - type = other199.type; - encodings = std::move(other199.encodings); - path_in_schema = std::move(other199.path_in_schema); - codec = other199.codec; - num_values = other199.num_values; - total_uncompressed_size = other199.total_uncompressed_size; - total_compressed_size = other199.total_compressed_size; - key_value_metadata = std::move(other199.key_value_metadata); - data_page_offset = other199.data_page_offset; - index_page_offset = other199.index_page_offset; - dictionary_page_offset = other199.dictionary_page_offset; - statistics = std::move(other199.statistics); - encoding_stats = std::move(other199.encoding_stats); - bloom_filter_offset = other199.bloom_filter_offset; - bloom_filter_length = other199.bloom_filter_length; - size_statistics = std::move(other199.size_statistics); - __isset = other199.__isset; -} -ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other200) { - type = other200.type; - encodings = other200.encodings; - path_in_schema = other200.path_in_schema; - codec = other200.codec; - num_values = other200.num_values; - total_uncompressed_size = other200.total_uncompressed_size; - total_compressed_size = other200.total_compressed_size; - key_value_metadata = other200.key_value_metadata; - data_page_offset = other200.data_page_offset; - index_page_offset = other200.index_page_offset; - dictionary_page_offset = other200.dictionary_page_offset; - statistics = other200.statistics; - encoding_stats = other200.encoding_stats; - bloom_filter_offset = other200.bloom_filter_offset; - bloom_filter_length = other200.bloom_filter_length; - size_statistics = other200.size_statistics; - __isset = other200.__isset; +bool ColumnMetaData::operator==(const ColumnMetaData & rhs) const +{ + if (!(type == rhs.type)) + return false; + if (!(encodings == rhs.encodings)) + return false; + if (!(path_in_schema == rhs.path_in_schema)) + return false; + if (!(codec == rhs.codec)) + return false; + if (!(num_values == rhs.num_values)) + return false; + if (!(total_uncompressed_size == rhs.total_uncompressed_size)) + return false; + if (!(total_compressed_size == rhs.total_compressed_size)) + return false; + if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) + return false; + else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) + return false; + if (!(data_page_offset == rhs.data_page_offset)) + return false; + if (__isset.index_page_offset != rhs.__isset.index_page_offset) + return false; + else if (__isset.index_page_offset && !(index_page_offset == rhs.index_page_offset)) + return false; + if (__isset.dictionary_page_offset != rhs.__isset.dictionary_page_offset) + return false; + else if (__isset.dictionary_page_offset && !(dictionary_page_offset == rhs.dictionary_page_offset)) + return false; + if (__isset.statistics != rhs.__isset.statistics) + return false; + else if (__isset.statistics && !(statistics == rhs.statistics)) + return false; + if (__isset.encoding_stats != rhs.__isset.encoding_stats) + return false; + else if (__isset.encoding_stats && !(encoding_stats == rhs.encoding_stats)) + return false; + if (__isset.bloom_filter_offset != rhs.__isset.bloom_filter_offset) + return false; + else if (__isset.bloom_filter_offset && !(bloom_filter_offset == rhs.bloom_filter_offset)) + return false; + if (__isset.bloom_filter_length != rhs.__isset.bloom_filter_length) + return false; + else if (__isset.bloom_filter_length && !(bloom_filter_length == rhs.bloom_filter_length)) + return false; + if (__isset.size_statistics != rhs.__isset.size_statistics) + return false; + else if (__isset.size_statistics && !(size_statistics == rhs.size_statistics)) + return false; + if (__isset.geospatial_statistics != rhs.__isset.geospatial_statistics) + return false; + else if (__isset.geospatial_statistics && !(geospatial_statistics == rhs.geospatial_statistics)) + return false; + return true; +} + +ColumnMetaData::ColumnMetaData(const ColumnMetaData& other225) { + type = other225.type; + encodings = other225.encodings; + path_in_schema = other225.path_in_schema; + codec = other225.codec; + num_values = other225.num_values; + total_uncompressed_size = other225.total_uncompressed_size; + total_compressed_size = other225.total_compressed_size; + key_value_metadata = other225.key_value_metadata; + data_page_offset = other225.data_page_offset; + index_page_offset = other225.index_page_offset; + dictionary_page_offset = other225.dictionary_page_offset; + statistics = other225.statistics; + encoding_stats = other225.encoding_stats; + bloom_filter_offset = other225.bloom_filter_offset; + bloom_filter_length = other225.bloom_filter_length; + size_statistics = other225.size_statistics; + geospatial_statistics = other225.geospatial_statistics; + __isset = other225.__isset; +} +ColumnMetaData::ColumnMetaData(ColumnMetaData&& other226) noexcept { + type = other226.type; + encodings = std::move(other226.encodings); + path_in_schema = std::move(other226.path_in_schema); + codec = other226.codec; + num_values = other226.num_values; + total_uncompressed_size = other226.total_uncompressed_size; + total_compressed_size = other226.total_compressed_size; + key_value_metadata = std::move(other226.key_value_metadata); + data_page_offset = other226.data_page_offset; + index_page_offset = other226.index_page_offset; + dictionary_page_offset = other226.dictionary_page_offset; + statistics = std::move(other226.statistics); + encoding_stats = std::move(other226.encoding_stats); + bloom_filter_offset = other226.bloom_filter_offset; + bloom_filter_length = other226.bloom_filter_length; + size_statistics = std::move(other226.size_statistics); + geospatial_statistics = std::move(other226.geospatial_statistics); + __isset = other226.__isset; +} +ColumnMetaData& ColumnMetaData::operator=(const ColumnMetaData& other227) { + type = other227.type; + encodings = other227.encodings; + path_in_schema = other227.path_in_schema; + codec = other227.codec; + num_values = other227.num_values; + total_uncompressed_size = other227.total_uncompressed_size; + total_compressed_size = other227.total_compressed_size; + key_value_metadata = other227.key_value_metadata; + data_page_offset = other227.data_page_offset; + index_page_offset = other227.index_page_offset; + dictionary_page_offset = other227.dictionary_page_offset; + statistics = other227.statistics; + encoding_stats = other227.encoding_stats; + bloom_filter_offset = other227.bloom_filter_offset; + bloom_filter_length = other227.bloom_filter_length; + size_statistics = other227.size_statistics; + geospatial_statistics = other227.geospatial_statistics; + __isset = other227.__isset; return *this; } -ColumnMetaData& ColumnMetaData::operator=(ColumnMetaData&& other201) noexcept { - type = other201.type; - encodings = std::move(other201.encodings); - path_in_schema = std::move(other201.path_in_schema); - codec = other201.codec; - num_values = other201.num_values; - total_uncompressed_size = other201.total_uncompressed_size; - total_compressed_size = other201.total_compressed_size; - key_value_metadata = std::move(other201.key_value_metadata); - data_page_offset = other201.data_page_offset; - index_page_offset = other201.index_page_offset; - dictionary_page_offset = other201.dictionary_page_offset; - statistics = std::move(other201.statistics); - encoding_stats = std::move(other201.encoding_stats); - bloom_filter_offset = other201.bloom_filter_offset; - bloom_filter_length = other201.bloom_filter_length; - size_statistics = std::move(other201.size_statistics); - __isset = other201.__isset; +ColumnMetaData& ColumnMetaData::operator=(ColumnMetaData&& other228) noexcept { + type = other228.type; + encodings = std::move(other228.encodings); + path_in_schema = std::move(other228.path_in_schema); + codec = other228.codec; + num_values = other228.num_values; + total_uncompressed_size = other228.total_uncompressed_size; + total_compressed_size = other228.total_compressed_size; + key_value_metadata = std::move(other228.key_value_metadata); + data_page_offset = other228.data_page_offset; + index_page_offset = other228.index_page_offset; + dictionary_page_offset = other228.dictionary_page_offset; + statistics = std::move(other228.statistics); + encoding_stats = std::move(other228.encoding_stats); + bloom_filter_offset = other228.bloom_filter_offset; + bloom_filter_length = other228.bloom_filter_length; + size_statistics = std::move(other228.size_statistics); + geospatial_statistics = std::move(other228.geospatial_statistics); + __isset = other228.__isset; return *this; } void ColumnMetaData::printTo(std::ostream& out) const { @@ -3029,6 +4184,7 @@ void ColumnMetaData::printTo(std::ostream& out) const { out << ", " << "bloom_filter_offset="; (__isset.bloom_filter_offset ? (out << to_string(bloom_filter_offset)) : (out << "")); out << ", " << "bloom_filter_length="; (__isset.bloom_filter_length ? (out << to_string(bloom_filter_length)) : (out << "")); out << ", " << "size_statistics="; (__isset.size_statistics ? (out << to_string(size_statistics)) : (out << "")); + out << ", " << "geospatial_statistics="; (__isset.geospatial_statistics ? (out << to_string(geospatial_statistics)) : (out << "")); out << ")"; } @@ -3036,6 +4192,8 @@ void ColumnMetaData::printTo(std::ostream& out) const { EncryptionWithFooterKey::~EncryptionWithFooterKey() noexcept { } +EncryptionWithFooterKey::EncryptionWithFooterKey() noexcept { +} std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj) { obj.printTo(out); @@ -3049,18 +4207,23 @@ void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) { (void) b; } -EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other202) noexcept { - (void) other202; +bool EncryptionWithFooterKey::operator==(const EncryptionWithFooterKey & /* rhs */) const +{ + return true; +} + +EncryptionWithFooterKey::EncryptionWithFooterKey(const EncryptionWithFooterKey& other229) noexcept { + (void) other229; } -EncryptionWithFooterKey::EncryptionWithFooterKey(EncryptionWithFooterKey&& other203) noexcept { - (void) other203; +EncryptionWithFooterKey::EncryptionWithFooterKey(EncryptionWithFooterKey&& other230) noexcept { + (void) other230; } -EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other204) noexcept { - (void) other204; +EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(const EncryptionWithFooterKey& other231) noexcept { + (void) other231; return *this; } -EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(EncryptionWithFooterKey&& other205) noexcept { - (void) other205; +EncryptionWithFooterKey& EncryptionWithFooterKey::operator=(EncryptionWithFooterKey&& other232) noexcept { + (void) other232; return *this; } void EncryptionWithFooterKey::printTo(std::ostream& out) const { @@ -3073,6 +4236,9 @@ void EncryptionWithFooterKey::printTo(std::ostream& out) const { EncryptionWithColumnKey::~EncryptionWithColumnKey() noexcept { } +EncryptionWithColumnKey::EncryptionWithColumnKey() noexcept + : key_metadata() { +} void EncryptionWithColumnKey::__set_path_in_schema(const std::vector & val) { this->path_in_schema = val; @@ -3096,26 +4262,37 @@ void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) { swap(a.__isset, b.__isset); } -EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other212) { - path_in_schema = other212.path_in_schema; - key_metadata = other212.key_metadata; - __isset = other212.__isset; +bool EncryptionWithColumnKey::operator==(const EncryptionWithColumnKey & rhs) const +{ + if (!(path_in_schema == rhs.path_in_schema)) + return false; + if (__isset.key_metadata != rhs.__isset.key_metadata) + return false; + else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata)) + return false; + return true; +} + +EncryptionWithColumnKey::EncryptionWithColumnKey(const EncryptionWithColumnKey& other239) { + path_in_schema = other239.path_in_schema; + key_metadata = other239.key_metadata; + __isset = other239.__isset; } -EncryptionWithColumnKey::EncryptionWithColumnKey(EncryptionWithColumnKey&& other213) noexcept { - path_in_schema = std::move(other213.path_in_schema); - key_metadata = std::move(other213.key_metadata); - __isset = other213.__isset; +EncryptionWithColumnKey::EncryptionWithColumnKey(EncryptionWithColumnKey&& other240) noexcept { + path_in_schema = std::move(other240.path_in_schema); + key_metadata = std::move(other240.key_metadata); + __isset = other240.__isset; } -EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other214) { - path_in_schema = other214.path_in_schema; - key_metadata = other214.key_metadata; - __isset = other214.__isset; +EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(const EncryptionWithColumnKey& other241) { + path_in_schema = other241.path_in_schema; + key_metadata = other241.key_metadata; + __isset = other241.__isset; return *this; } -EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(EncryptionWithColumnKey&& other215) noexcept { - path_in_schema = std::move(other215.path_in_schema); - key_metadata = std::move(other215.key_metadata); - __isset = other215.__isset; +EncryptionWithColumnKey& EncryptionWithColumnKey::operator=(EncryptionWithColumnKey&& other242) noexcept { + path_in_schema = std::move(other242.path_in_schema); + key_metadata = std::move(other242.key_metadata); + __isset = other242.__isset; return *this; } void EncryptionWithColumnKey::printTo(std::ostream& out) const { @@ -3130,6 +4307,8 @@ void EncryptionWithColumnKey::printTo(std::ostream& out) const { ColumnCryptoMetaData::~ColumnCryptoMetaData() noexcept { } +ColumnCryptoMetaData::ColumnCryptoMetaData() noexcept { +} void ColumnCryptoMetaData::__set_ENCRYPTION_WITH_FOOTER_KEY(const EncryptionWithFooterKey& val) { this->ENCRYPTION_WITH_FOOTER_KEY = val; @@ -3154,26 +4333,39 @@ void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) { swap(a.__isset, b.__isset); } -ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other216) { - ENCRYPTION_WITH_FOOTER_KEY = other216.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other216.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other216.__isset; -} -ColumnCryptoMetaData::ColumnCryptoMetaData(ColumnCryptoMetaData&& other217) noexcept { - ENCRYPTION_WITH_FOOTER_KEY = std::move(other217.ENCRYPTION_WITH_FOOTER_KEY); - ENCRYPTION_WITH_COLUMN_KEY = std::move(other217.ENCRYPTION_WITH_COLUMN_KEY); - __isset = other217.__isset; +bool ColumnCryptoMetaData::operator==(const ColumnCryptoMetaData & rhs) const +{ + if (__isset.ENCRYPTION_WITH_FOOTER_KEY != rhs.__isset.ENCRYPTION_WITH_FOOTER_KEY) + return false; + else if (__isset.ENCRYPTION_WITH_FOOTER_KEY && !(ENCRYPTION_WITH_FOOTER_KEY == rhs.ENCRYPTION_WITH_FOOTER_KEY)) + return false; + if (__isset.ENCRYPTION_WITH_COLUMN_KEY != rhs.__isset.ENCRYPTION_WITH_COLUMN_KEY) + return false; + else if (__isset.ENCRYPTION_WITH_COLUMN_KEY && !(ENCRYPTION_WITH_COLUMN_KEY == rhs.ENCRYPTION_WITH_COLUMN_KEY)) + return false; + return true; +} + +ColumnCryptoMetaData::ColumnCryptoMetaData(const ColumnCryptoMetaData& other243) { + ENCRYPTION_WITH_FOOTER_KEY = other243.ENCRYPTION_WITH_FOOTER_KEY; + ENCRYPTION_WITH_COLUMN_KEY = other243.ENCRYPTION_WITH_COLUMN_KEY; + __isset = other243.__isset; +} +ColumnCryptoMetaData::ColumnCryptoMetaData(ColumnCryptoMetaData&& other244) noexcept { + ENCRYPTION_WITH_FOOTER_KEY = std::move(other244.ENCRYPTION_WITH_FOOTER_KEY); + ENCRYPTION_WITH_COLUMN_KEY = std::move(other244.ENCRYPTION_WITH_COLUMN_KEY); + __isset = other244.__isset; } -ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other218) { - ENCRYPTION_WITH_FOOTER_KEY = other218.ENCRYPTION_WITH_FOOTER_KEY; - ENCRYPTION_WITH_COLUMN_KEY = other218.ENCRYPTION_WITH_COLUMN_KEY; - __isset = other218.__isset; +ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(const ColumnCryptoMetaData& other245) { + ENCRYPTION_WITH_FOOTER_KEY = other245.ENCRYPTION_WITH_FOOTER_KEY; + ENCRYPTION_WITH_COLUMN_KEY = other245.ENCRYPTION_WITH_COLUMN_KEY; + __isset = other245.__isset; return *this; } -ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(ColumnCryptoMetaData&& other219) noexcept { - ENCRYPTION_WITH_FOOTER_KEY = std::move(other219.ENCRYPTION_WITH_FOOTER_KEY); - ENCRYPTION_WITH_COLUMN_KEY = std::move(other219.ENCRYPTION_WITH_COLUMN_KEY); - __isset = other219.__isset; +ColumnCryptoMetaData& ColumnCryptoMetaData::operator=(ColumnCryptoMetaData&& other246) noexcept { + ENCRYPTION_WITH_FOOTER_KEY = std::move(other246.ENCRYPTION_WITH_FOOTER_KEY); + ENCRYPTION_WITH_COLUMN_KEY = std::move(other246.ENCRYPTION_WITH_COLUMN_KEY); + __isset = other246.__isset; return *this; } void ColumnCryptoMetaData::printTo(std::ostream& out) const { @@ -3188,6 +4380,15 @@ void ColumnCryptoMetaData::printTo(std::ostream& out) const { ColumnChunk::~ColumnChunk() noexcept { } +ColumnChunk::ColumnChunk() noexcept + : file_path(), + file_offset(0LL), + offset_index_offset(0), + offset_index_length(0), + column_index_offset(0), + column_index_length(0), + encrypted_column_metadata() { +} void ColumnChunk::__set_file_path(const std::string& val) { this->file_path = val; @@ -3253,54 +4454,93 @@ void swap(ColumnChunk &a, ColumnChunk &b) { swap(a.__isset, b.__isset); } -ColumnChunk::ColumnChunk(const ColumnChunk& other220) { - file_path = other220.file_path; - file_offset = other220.file_offset; - meta_data = other220.meta_data; - offset_index_offset = other220.offset_index_offset; - offset_index_length = other220.offset_index_length; - column_index_offset = other220.column_index_offset; - column_index_length = other220.column_index_length; - crypto_metadata = other220.crypto_metadata; - encrypted_column_metadata = other220.encrypted_column_metadata; - __isset = other220.__isset; -} -ColumnChunk::ColumnChunk(ColumnChunk&& other221) noexcept { - file_path = std::move(other221.file_path); - file_offset = other221.file_offset; - meta_data = std::move(other221.meta_data); - offset_index_offset = other221.offset_index_offset; - offset_index_length = other221.offset_index_length; - column_index_offset = other221.column_index_offset; - column_index_length = other221.column_index_length; - crypto_metadata = std::move(other221.crypto_metadata); - encrypted_column_metadata = std::move(other221.encrypted_column_metadata); - __isset = other221.__isset; -} -ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other222) { - file_path = other222.file_path; - file_offset = other222.file_offset; - meta_data = other222.meta_data; - offset_index_offset = other222.offset_index_offset; - offset_index_length = other222.offset_index_length; - column_index_offset = other222.column_index_offset; - column_index_length = other222.column_index_length; - crypto_metadata = other222.crypto_metadata; - encrypted_column_metadata = other222.encrypted_column_metadata; - __isset = other222.__isset; +bool ColumnChunk::operator==(const ColumnChunk & rhs) const +{ + if (__isset.file_path != rhs.__isset.file_path) + return false; + else if (__isset.file_path && !(file_path == rhs.file_path)) + return false; + if (!(file_offset == rhs.file_offset)) + return false; + if (__isset.meta_data != rhs.__isset.meta_data) + return false; + else if (__isset.meta_data && !(meta_data == rhs.meta_data)) + return false; + if (__isset.offset_index_offset != rhs.__isset.offset_index_offset) + return false; + else if (__isset.offset_index_offset && !(offset_index_offset == rhs.offset_index_offset)) + return false; + if (__isset.offset_index_length != rhs.__isset.offset_index_length) + return false; + else if (__isset.offset_index_length && !(offset_index_length == rhs.offset_index_length)) + return false; + if (__isset.column_index_offset != rhs.__isset.column_index_offset) + return false; + else if (__isset.column_index_offset && !(column_index_offset == rhs.column_index_offset)) + return false; + if (__isset.column_index_length != rhs.__isset.column_index_length) + return false; + else if (__isset.column_index_length && !(column_index_length == rhs.column_index_length)) + return false; + if (__isset.crypto_metadata != rhs.__isset.crypto_metadata) + return false; + else if (__isset.crypto_metadata && !(crypto_metadata == rhs.crypto_metadata)) + return false; + if (__isset.encrypted_column_metadata != rhs.__isset.encrypted_column_metadata) + return false; + else if (__isset.encrypted_column_metadata && !(encrypted_column_metadata == rhs.encrypted_column_metadata)) + return false; + return true; +} + +ColumnChunk::ColumnChunk(const ColumnChunk& other247) { + file_path = other247.file_path; + file_offset = other247.file_offset; + meta_data = other247.meta_data; + offset_index_offset = other247.offset_index_offset; + offset_index_length = other247.offset_index_length; + column_index_offset = other247.column_index_offset; + column_index_length = other247.column_index_length; + crypto_metadata = other247.crypto_metadata; + encrypted_column_metadata = other247.encrypted_column_metadata; + __isset = other247.__isset; +} +ColumnChunk::ColumnChunk(ColumnChunk&& other248) noexcept { + file_path = std::move(other248.file_path); + file_offset = other248.file_offset; + meta_data = std::move(other248.meta_data); + offset_index_offset = other248.offset_index_offset; + offset_index_length = other248.offset_index_length; + column_index_offset = other248.column_index_offset; + column_index_length = other248.column_index_length; + crypto_metadata = std::move(other248.crypto_metadata); + encrypted_column_metadata = std::move(other248.encrypted_column_metadata); + __isset = other248.__isset; +} +ColumnChunk& ColumnChunk::operator=(const ColumnChunk& other249) { + file_path = other249.file_path; + file_offset = other249.file_offset; + meta_data = other249.meta_data; + offset_index_offset = other249.offset_index_offset; + offset_index_length = other249.offset_index_length; + column_index_offset = other249.column_index_offset; + column_index_length = other249.column_index_length; + crypto_metadata = other249.crypto_metadata; + encrypted_column_metadata = other249.encrypted_column_metadata; + __isset = other249.__isset; return *this; } -ColumnChunk& ColumnChunk::operator=(ColumnChunk&& other223) noexcept { - file_path = std::move(other223.file_path); - file_offset = other223.file_offset; - meta_data = std::move(other223.meta_data); - offset_index_offset = other223.offset_index_offset; - offset_index_length = other223.offset_index_length; - column_index_offset = other223.column_index_offset; - column_index_length = other223.column_index_length; - crypto_metadata = std::move(other223.crypto_metadata); - encrypted_column_metadata = std::move(other223.encrypted_column_metadata); - __isset = other223.__isset; +ColumnChunk& ColumnChunk::operator=(ColumnChunk&& other250) noexcept { + file_path = std::move(other250.file_path); + file_offset = other250.file_offset; + meta_data = std::move(other250.meta_data); + offset_index_offset = other250.offset_index_offset; + offset_index_length = other250.offset_index_length; + column_index_offset = other250.column_index_offset; + column_index_length = other250.column_index_length; + crypto_metadata = std::move(other250.crypto_metadata); + encrypted_column_metadata = std::move(other250.encrypted_column_metadata); + __isset = other250.__isset; return *this; } void ColumnChunk::printTo(std::ostream& out) const { @@ -3322,6 +4562,13 @@ void ColumnChunk::printTo(std::ostream& out) const { RowGroup::~RowGroup() noexcept { } +RowGroup::RowGroup() noexcept + : total_byte_size(0), + num_rows(0), + file_offset(0), + total_compressed_size(0), + ordinal(0) { +} void RowGroup::__set_columns(const std::vector & val) { this->columns = val; @@ -3373,46 +4620,73 @@ void swap(RowGroup &a, RowGroup &b) { swap(a.__isset, b.__isset); } -RowGroup::RowGroup(const RowGroup& other236) { - columns = other236.columns; - total_byte_size = other236.total_byte_size; - num_rows = other236.num_rows; - sorting_columns = other236.sorting_columns; - file_offset = other236.file_offset; - total_compressed_size = other236.total_compressed_size; - ordinal = other236.ordinal; - __isset = other236.__isset; -} -RowGroup::RowGroup(RowGroup&& other237) noexcept { - columns = std::move(other237.columns); - total_byte_size = other237.total_byte_size; - num_rows = other237.num_rows; - sorting_columns = std::move(other237.sorting_columns); - file_offset = other237.file_offset; - total_compressed_size = other237.total_compressed_size; - ordinal = other237.ordinal; - __isset = other237.__isset; -} -RowGroup& RowGroup::operator=(const RowGroup& other238) { - columns = other238.columns; - total_byte_size = other238.total_byte_size; - num_rows = other238.num_rows; - sorting_columns = other238.sorting_columns; - file_offset = other238.file_offset; - total_compressed_size = other238.total_compressed_size; - ordinal = other238.ordinal; - __isset = other238.__isset; +bool RowGroup::operator==(const RowGroup & rhs) const +{ + if (!(columns == rhs.columns)) + return false; + if (!(total_byte_size == rhs.total_byte_size)) + return false; + if (!(num_rows == rhs.num_rows)) + return false; + if (__isset.sorting_columns != rhs.__isset.sorting_columns) + return false; + else if (__isset.sorting_columns && !(sorting_columns == rhs.sorting_columns)) + return false; + if (__isset.file_offset != rhs.__isset.file_offset) + return false; + else if (__isset.file_offset && !(file_offset == rhs.file_offset)) + return false; + if (__isset.total_compressed_size != rhs.__isset.total_compressed_size) + return false; + else if (__isset.total_compressed_size && !(total_compressed_size == rhs.total_compressed_size)) + return false; + if (__isset.ordinal != rhs.__isset.ordinal) + return false; + else if (__isset.ordinal && !(ordinal == rhs.ordinal)) + return false; + return true; +} + +RowGroup::RowGroup(const RowGroup& other263) { + columns = other263.columns; + total_byte_size = other263.total_byte_size; + num_rows = other263.num_rows; + sorting_columns = other263.sorting_columns; + file_offset = other263.file_offset; + total_compressed_size = other263.total_compressed_size; + ordinal = other263.ordinal; + __isset = other263.__isset; +} +RowGroup::RowGroup(RowGroup&& other264) noexcept { + columns = std::move(other264.columns); + total_byte_size = other264.total_byte_size; + num_rows = other264.num_rows; + sorting_columns = std::move(other264.sorting_columns); + file_offset = other264.file_offset; + total_compressed_size = other264.total_compressed_size; + ordinal = other264.ordinal; + __isset = other264.__isset; +} +RowGroup& RowGroup::operator=(const RowGroup& other265) { + columns = other265.columns; + total_byte_size = other265.total_byte_size; + num_rows = other265.num_rows; + sorting_columns = other265.sorting_columns; + file_offset = other265.file_offset; + total_compressed_size = other265.total_compressed_size; + ordinal = other265.ordinal; + __isset = other265.__isset; return *this; } -RowGroup& RowGroup::operator=(RowGroup&& other239) noexcept { - columns = std::move(other239.columns); - total_byte_size = other239.total_byte_size; - num_rows = other239.num_rows; - sorting_columns = std::move(other239.sorting_columns); - file_offset = other239.file_offset; - total_compressed_size = other239.total_compressed_size; - ordinal = other239.ordinal; - __isset = other239.__isset; +RowGroup& RowGroup::operator=(RowGroup&& other266) noexcept { + columns = std::move(other266.columns); + total_byte_size = other266.total_byte_size; + num_rows = other266.num_rows; + sorting_columns = std::move(other266.sorting_columns); + file_offset = other266.file_offset; + total_compressed_size = other266.total_compressed_size; + ordinal = other266.ordinal; + __isset = other266.__isset; return *this; } void RowGroup::printTo(std::ostream& out) const { @@ -3432,6 +4706,8 @@ void RowGroup::printTo(std::ostream& out) const { TypeDefinedOrder::~TypeDefinedOrder() noexcept { } +TypeDefinedOrder::TypeDefinedOrder() noexcept { +} std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj) { obj.printTo(out); @@ -3445,18 +4721,23 @@ void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) { (void) b; } -TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other240) noexcept { - (void) other240; +bool TypeDefinedOrder::operator==(const TypeDefinedOrder & /* rhs */) const +{ + return true; +} + +TypeDefinedOrder::TypeDefinedOrder(const TypeDefinedOrder& other267) noexcept { + (void) other267; } -TypeDefinedOrder::TypeDefinedOrder(TypeDefinedOrder&& other241) noexcept { - (void) other241; +TypeDefinedOrder::TypeDefinedOrder(TypeDefinedOrder&& other268) noexcept { + (void) other268; } -TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other242) noexcept { - (void) other242; +TypeDefinedOrder& TypeDefinedOrder::operator=(const TypeDefinedOrder& other269) noexcept { + (void) other269; return *this; } -TypeDefinedOrder& TypeDefinedOrder::operator=(TypeDefinedOrder&& other243) noexcept { - (void) other243; +TypeDefinedOrder& TypeDefinedOrder::operator=(TypeDefinedOrder&& other270) noexcept { + (void) other270; return *this; } void TypeDefinedOrder::printTo(std::ostream& out) const { @@ -3469,6 +4750,8 @@ void TypeDefinedOrder::printTo(std::ostream& out) const { ColumnOrder::~ColumnOrder() noexcept { } +ColumnOrder::ColumnOrder() noexcept { +} void ColumnOrder::__set_TYPE_ORDER(const TypeDefinedOrder& val) { this->TYPE_ORDER = val; @@ -3487,22 +4770,31 @@ void swap(ColumnOrder &a, ColumnOrder &b) { swap(a.__isset, b.__isset); } -ColumnOrder::ColumnOrder(const ColumnOrder& other244) noexcept { - TYPE_ORDER = other244.TYPE_ORDER; - __isset = other244.__isset; +bool ColumnOrder::operator==(const ColumnOrder & rhs) const +{ + if (__isset.TYPE_ORDER != rhs.__isset.TYPE_ORDER) + return false; + else if (__isset.TYPE_ORDER && !(TYPE_ORDER == rhs.TYPE_ORDER)) + return false; + return true; } -ColumnOrder::ColumnOrder(ColumnOrder&& other245) noexcept { - TYPE_ORDER = std::move(other245.TYPE_ORDER); - __isset = other245.__isset; + +ColumnOrder::ColumnOrder(const ColumnOrder& other271) noexcept { + TYPE_ORDER = other271.TYPE_ORDER; + __isset = other271.__isset; } -ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other246) noexcept { - TYPE_ORDER = other246.TYPE_ORDER; - __isset = other246.__isset; +ColumnOrder::ColumnOrder(ColumnOrder&& other272) noexcept { + TYPE_ORDER = std::move(other272.TYPE_ORDER); + __isset = other272.__isset; +} +ColumnOrder& ColumnOrder::operator=(const ColumnOrder& other273) noexcept { + TYPE_ORDER = other273.TYPE_ORDER; + __isset = other273.__isset; return *this; } -ColumnOrder& ColumnOrder::operator=(ColumnOrder&& other247) noexcept { - TYPE_ORDER = std::move(other247.TYPE_ORDER); - __isset = other247.__isset; +ColumnOrder& ColumnOrder::operator=(ColumnOrder&& other274) noexcept { + TYPE_ORDER = std::move(other274.TYPE_ORDER); + __isset = other274.__isset; return *this; } void ColumnOrder::printTo(std::ostream& out) const { @@ -3516,6 +4808,11 @@ void ColumnOrder::printTo(std::ostream& out) const { PageLocation::~PageLocation() noexcept { } +PageLocation::PageLocation() noexcept + : offset(0), + compressed_page_size(0), + first_row_index(0) { +} void PageLocation::__set_offset(const int64_t val) { this->offset = val; @@ -3542,26 +4839,37 @@ void swap(PageLocation &a, PageLocation &b) { swap(a.first_row_index, b.first_row_index); } -PageLocation::PageLocation(const PageLocation& other248) noexcept { - offset = other248.offset; - compressed_page_size = other248.compressed_page_size; - first_row_index = other248.first_row_index; -} -PageLocation::PageLocation(PageLocation&& other249) noexcept { - offset = other249.offset; - compressed_page_size = other249.compressed_page_size; - first_row_index = other249.first_row_index; -} -PageLocation& PageLocation::operator=(const PageLocation& other250) noexcept { - offset = other250.offset; - compressed_page_size = other250.compressed_page_size; - first_row_index = other250.first_row_index; +bool PageLocation::operator==(const PageLocation & rhs) const +{ + if (!(offset == rhs.offset)) + return false; + if (!(compressed_page_size == rhs.compressed_page_size)) + return false; + if (!(first_row_index == rhs.first_row_index)) + return false; + return true; +} + +PageLocation::PageLocation(const PageLocation& other275) noexcept { + offset = other275.offset; + compressed_page_size = other275.compressed_page_size; + first_row_index = other275.first_row_index; +} +PageLocation::PageLocation(PageLocation&& other276) noexcept { + offset = other276.offset; + compressed_page_size = other276.compressed_page_size; + first_row_index = other276.first_row_index; +} +PageLocation& PageLocation::operator=(const PageLocation& other277) noexcept { + offset = other277.offset; + compressed_page_size = other277.compressed_page_size; + first_row_index = other277.first_row_index; return *this; } -PageLocation& PageLocation::operator=(PageLocation&& other251) noexcept { - offset = other251.offset; - compressed_page_size = other251.compressed_page_size; - first_row_index = other251.first_row_index; +PageLocation& PageLocation::operator=(PageLocation&& other278) noexcept { + offset = other278.offset; + compressed_page_size = other278.compressed_page_size; + first_row_index = other278.first_row_index; return *this; } void PageLocation::printTo(std::ostream& out) const { @@ -3577,6 +4885,8 @@ void PageLocation::printTo(std::ostream& out) const { OffsetIndex::~OffsetIndex() noexcept { } +OffsetIndex::OffsetIndex() noexcept { +} void OffsetIndex::__set_page_locations(const std::vector & val) { this->page_locations = val; @@ -3600,26 +4910,37 @@ void swap(OffsetIndex &a, OffsetIndex &b) { swap(a.__isset, b.__isset); } -OffsetIndex::OffsetIndex(const OffsetIndex& other264) { - page_locations = other264.page_locations; - unencoded_byte_array_data_bytes = other264.unencoded_byte_array_data_bytes; - __isset = other264.__isset; -} -OffsetIndex::OffsetIndex(OffsetIndex&& other265) noexcept { - page_locations = std::move(other265.page_locations); - unencoded_byte_array_data_bytes = std::move(other265.unencoded_byte_array_data_bytes); - __isset = other265.__isset; -} -OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other266) { - page_locations = other266.page_locations; - unencoded_byte_array_data_bytes = other266.unencoded_byte_array_data_bytes; - __isset = other266.__isset; +bool OffsetIndex::operator==(const OffsetIndex & rhs) const +{ + if (!(page_locations == rhs.page_locations)) + return false; + if (__isset.unencoded_byte_array_data_bytes != rhs.__isset.unencoded_byte_array_data_bytes) + return false; + else if (__isset.unencoded_byte_array_data_bytes && !(unencoded_byte_array_data_bytes == rhs.unencoded_byte_array_data_bytes)) + return false; + return true; +} + +OffsetIndex::OffsetIndex(const OffsetIndex& other291) { + page_locations = other291.page_locations; + unencoded_byte_array_data_bytes = other291.unencoded_byte_array_data_bytes; + __isset = other291.__isset; +} +OffsetIndex::OffsetIndex(OffsetIndex&& other292) noexcept { + page_locations = std::move(other292.page_locations); + unencoded_byte_array_data_bytes = std::move(other292.unencoded_byte_array_data_bytes); + __isset = other292.__isset; +} +OffsetIndex& OffsetIndex::operator=(const OffsetIndex& other293) { + page_locations = other293.page_locations; + unencoded_byte_array_data_bytes = other293.unencoded_byte_array_data_bytes; + __isset = other293.__isset; return *this; } -OffsetIndex& OffsetIndex::operator=(OffsetIndex&& other267) noexcept { - page_locations = std::move(other267.page_locations); - unencoded_byte_array_data_bytes = std::move(other267.unencoded_byte_array_data_bytes); - __isset = other267.__isset; +OffsetIndex& OffsetIndex::operator=(OffsetIndex&& other294) noexcept { + page_locations = std::move(other294.page_locations); + unencoded_byte_array_data_bytes = std::move(other294.unencoded_byte_array_data_bytes); + __isset = other294.__isset; return *this; } void OffsetIndex::printTo(std::ostream& out) const { @@ -3634,6 +4955,9 @@ void OffsetIndex::printTo(std::ostream& out) const { ColumnIndex::~ColumnIndex() noexcept { } +ColumnIndex::ColumnIndex() noexcept + : boundary_order(static_cast(0)) { +} void ColumnIndex::__set_null_pages(const std::vector & val) { this->null_pages = val; @@ -3684,46 +5008,71 @@ void swap(ColumnIndex &a, ColumnIndex &b) { swap(a.__isset, b.__isset); } -ColumnIndex::ColumnIndex(const ColumnIndex& other305) { - null_pages = other305.null_pages; - min_values = other305.min_values; - max_values = other305.max_values; - boundary_order = other305.boundary_order; - null_counts = other305.null_counts; - repetition_level_histograms = other305.repetition_level_histograms; - definition_level_histograms = other305.definition_level_histograms; - __isset = other305.__isset; -} -ColumnIndex::ColumnIndex(ColumnIndex&& other306) noexcept { - null_pages = std::move(other306.null_pages); - min_values = std::move(other306.min_values); - max_values = std::move(other306.max_values); - boundary_order = other306.boundary_order; - null_counts = std::move(other306.null_counts); - repetition_level_histograms = std::move(other306.repetition_level_histograms); - definition_level_histograms = std::move(other306.definition_level_histograms); - __isset = other306.__isset; -} -ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other307) { - null_pages = other307.null_pages; - min_values = other307.min_values; - max_values = other307.max_values; - boundary_order = other307.boundary_order; - null_counts = other307.null_counts; - repetition_level_histograms = other307.repetition_level_histograms; - definition_level_histograms = other307.definition_level_histograms; - __isset = other307.__isset; +bool ColumnIndex::operator==(const ColumnIndex & rhs) const +{ + if (!(null_pages == rhs.null_pages)) + return false; + if (!(min_values == rhs.min_values)) + return false; + if (!(max_values == rhs.max_values)) + return false; + if (!(boundary_order == rhs.boundary_order)) + return false; + if (__isset.null_counts != rhs.__isset.null_counts) + return false; + else if (__isset.null_counts && !(null_counts == rhs.null_counts)) + return false; + if (__isset.repetition_level_histograms != rhs.__isset.repetition_level_histograms) + return false; + else if (__isset.repetition_level_histograms && !(repetition_level_histograms == rhs.repetition_level_histograms)) + return false; + if (__isset.definition_level_histograms != rhs.__isset.definition_level_histograms) + return false; + else if (__isset.definition_level_histograms && !(definition_level_histograms == rhs.definition_level_histograms)) + return false; + return true; +} + +ColumnIndex::ColumnIndex(const ColumnIndex& other332) { + null_pages = other332.null_pages; + min_values = other332.min_values; + max_values = other332.max_values; + boundary_order = other332.boundary_order; + null_counts = other332.null_counts; + repetition_level_histograms = other332.repetition_level_histograms; + definition_level_histograms = other332.definition_level_histograms; + __isset = other332.__isset; +} +ColumnIndex::ColumnIndex(ColumnIndex&& other333) noexcept { + null_pages = std::move(other333.null_pages); + min_values = std::move(other333.min_values); + max_values = std::move(other333.max_values); + boundary_order = other333.boundary_order; + null_counts = std::move(other333.null_counts); + repetition_level_histograms = std::move(other333.repetition_level_histograms); + definition_level_histograms = std::move(other333.definition_level_histograms); + __isset = other333.__isset; +} +ColumnIndex& ColumnIndex::operator=(const ColumnIndex& other334) { + null_pages = other334.null_pages; + min_values = other334.min_values; + max_values = other334.max_values; + boundary_order = other334.boundary_order; + null_counts = other334.null_counts; + repetition_level_histograms = other334.repetition_level_histograms; + definition_level_histograms = other334.definition_level_histograms; + __isset = other334.__isset; return *this; } -ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other308) noexcept { - null_pages = std::move(other308.null_pages); - min_values = std::move(other308.min_values); - max_values = std::move(other308.max_values); - boundary_order = other308.boundary_order; - null_counts = std::move(other308.null_counts); - repetition_level_histograms = std::move(other308.repetition_level_histograms); - definition_level_histograms = std::move(other308.definition_level_histograms); - __isset = other308.__isset; +ColumnIndex& ColumnIndex::operator=(ColumnIndex&& other335) noexcept { + null_pages = std::move(other335.null_pages); + min_values = std::move(other335.min_values); + max_values = std::move(other335.max_values); + boundary_order = other335.boundary_order; + null_counts = std::move(other335.null_counts); + repetition_level_histograms = std::move(other335.repetition_level_histograms); + definition_level_histograms = std::move(other335.definition_level_histograms); + __isset = other335.__isset; return *this; } void ColumnIndex::printTo(std::ostream& out) const { @@ -3743,6 +5092,11 @@ void ColumnIndex::printTo(std::ostream& out) const { AesGcmV1::~AesGcmV1() noexcept { } +AesGcmV1::AesGcmV1() noexcept + : aad_prefix(), + aad_file_unique(), + supply_aad_prefix(0) { +} void AesGcmV1::__set_aad_prefix(const std::string& val) { this->aad_prefix = val; @@ -3773,30 +5127,47 @@ void swap(AesGcmV1 &a, AesGcmV1 &b) { swap(a.__isset, b.__isset); } -AesGcmV1::AesGcmV1(const AesGcmV1& other309) { - aad_prefix = other309.aad_prefix; - aad_file_unique = other309.aad_file_unique; - supply_aad_prefix = other309.supply_aad_prefix; - __isset = other309.__isset; -} -AesGcmV1::AesGcmV1(AesGcmV1&& other310) noexcept { - aad_prefix = std::move(other310.aad_prefix); - aad_file_unique = std::move(other310.aad_file_unique); - supply_aad_prefix = other310.supply_aad_prefix; - __isset = other310.__isset; -} -AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other311) { - aad_prefix = other311.aad_prefix; - aad_file_unique = other311.aad_file_unique; - supply_aad_prefix = other311.supply_aad_prefix; - __isset = other311.__isset; +bool AesGcmV1::operator==(const AesGcmV1 & rhs) const +{ + if (__isset.aad_prefix != rhs.__isset.aad_prefix) + return false; + else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix)) + return false; + if (__isset.aad_file_unique != rhs.__isset.aad_file_unique) + return false; + else if (__isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique)) + return false; + if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix) + return false; + else if (__isset.supply_aad_prefix && !(supply_aad_prefix == rhs.supply_aad_prefix)) + return false; + return true; +} + +AesGcmV1::AesGcmV1(const AesGcmV1& other336) { + aad_prefix = other336.aad_prefix; + aad_file_unique = other336.aad_file_unique; + supply_aad_prefix = other336.supply_aad_prefix; + __isset = other336.__isset; +} +AesGcmV1::AesGcmV1(AesGcmV1&& other337) noexcept { + aad_prefix = std::move(other337.aad_prefix); + aad_file_unique = std::move(other337.aad_file_unique); + supply_aad_prefix = other337.supply_aad_prefix; + __isset = other337.__isset; +} +AesGcmV1& AesGcmV1::operator=(const AesGcmV1& other338) { + aad_prefix = other338.aad_prefix; + aad_file_unique = other338.aad_file_unique; + supply_aad_prefix = other338.supply_aad_prefix; + __isset = other338.__isset; return *this; } -AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other312) noexcept { - aad_prefix = std::move(other312.aad_prefix); - aad_file_unique = std::move(other312.aad_file_unique); - supply_aad_prefix = other312.supply_aad_prefix; - __isset = other312.__isset; +AesGcmV1& AesGcmV1::operator=(AesGcmV1&& other339) noexcept { + aad_prefix = std::move(other339.aad_prefix); + aad_file_unique = std::move(other339.aad_file_unique); + supply_aad_prefix = other339.supply_aad_prefix; + __isset = other339.__isset; return *this; } void AesGcmV1::printTo(std::ostream& out) const { @@ -3812,6 +5183,11 @@ void AesGcmV1::printTo(std::ostream& out) const { AesGcmCtrV1::~AesGcmCtrV1() noexcept { } +AesGcmCtrV1::AesGcmCtrV1() noexcept + : aad_prefix(), + aad_file_unique(), + supply_aad_prefix(0) { +} void AesGcmCtrV1::__set_aad_prefix(const std::string& val) { this->aad_prefix = val; @@ -3842,30 +5218,47 @@ void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) { swap(a.__isset, b.__isset); } -AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other313) { - aad_prefix = other313.aad_prefix; - aad_file_unique = other313.aad_file_unique; - supply_aad_prefix = other313.supply_aad_prefix; - __isset = other313.__isset; -} -AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other314) noexcept { - aad_prefix = std::move(other314.aad_prefix); - aad_file_unique = std::move(other314.aad_file_unique); - supply_aad_prefix = other314.supply_aad_prefix; - __isset = other314.__isset; -} -AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other315) { - aad_prefix = other315.aad_prefix; - aad_file_unique = other315.aad_file_unique; - supply_aad_prefix = other315.supply_aad_prefix; - __isset = other315.__isset; +bool AesGcmCtrV1::operator==(const AesGcmCtrV1 & rhs) const +{ + if (__isset.aad_prefix != rhs.__isset.aad_prefix) + return false; + else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix)) + return false; + if (__isset.aad_file_unique != rhs.__isset.aad_file_unique) + return false; + else if (__isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique)) + return false; + if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix) + return false; + else if (__isset.supply_aad_prefix && !(supply_aad_prefix == rhs.supply_aad_prefix)) + return false; + return true; +} + +AesGcmCtrV1::AesGcmCtrV1(const AesGcmCtrV1& other340) { + aad_prefix = other340.aad_prefix; + aad_file_unique = other340.aad_file_unique; + supply_aad_prefix = other340.supply_aad_prefix; + __isset = other340.__isset; +} +AesGcmCtrV1::AesGcmCtrV1(AesGcmCtrV1&& other341) noexcept { + aad_prefix = std::move(other341.aad_prefix); + aad_file_unique = std::move(other341.aad_file_unique); + supply_aad_prefix = other341.supply_aad_prefix; + __isset = other341.__isset; +} +AesGcmCtrV1& AesGcmCtrV1::operator=(const AesGcmCtrV1& other342) { + aad_prefix = other342.aad_prefix; + aad_file_unique = other342.aad_file_unique; + supply_aad_prefix = other342.supply_aad_prefix; + __isset = other342.__isset; return *this; } -AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other316) noexcept { - aad_prefix = std::move(other316.aad_prefix); - aad_file_unique = std::move(other316.aad_file_unique); - supply_aad_prefix = other316.supply_aad_prefix; - __isset = other316.__isset; +AesGcmCtrV1& AesGcmCtrV1::operator=(AesGcmCtrV1&& other343) noexcept { + aad_prefix = std::move(other343.aad_prefix); + aad_file_unique = std::move(other343.aad_file_unique); + supply_aad_prefix = other343.supply_aad_prefix; + __isset = other343.__isset; return *this; } void AesGcmCtrV1::printTo(std::ostream& out) const { @@ -3881,6 +5274,8 @@ void AesGcmCtrV1::printTo(std::ostream& out) const { EncryptionAlgorithm::~EncryptionAlgorithm() noexcept { } +EncryptionAlgorithm::EncryptionAlgorithm() noexcept { +} void EncryptionAlgorithm::__set_AES_GCM_V1(const AesGcmV1& val) { this->AES_GCM_V1 = val; @@ -3905,26 +5300,39 @@ void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) { swap(a.__isset, b.__isset); } -EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other317) { - AES_GCM_V1 = other317.AES_GCM_V1; - AES_GCM_CTR_V1 = other317.AES_GCM_CTR_V1; - __isset = other317.__isset; -} -EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other318) noexcept { - AES_GCM_V1 = std::move(other318.AES_GCM_V1); - AES_GCM_CTR_V1 = std::move(other318.AES_GCM_CTR_V1); - __isset = other318.__isset; +bool EncryptionAlgorithm::operator==(const EncryptionAlgorithm & rhs) const +{ + if (__isset.AES_GCM_V1 != rhs.__isset.AES_GCM_V1) + return false; + else if (__isset.AES_GCM_V1 && !(AES_GCM_V1 == rhs.AES_GCM_V1)) + return false; + if (__isset.AES_GCM_CTR_V1 != rhs.__isset.AES_GCM_CTR_V1) + return false; + else if (__isset.AES_GCM_CTR_V1 && !(AES_GCM_CTR_V1 == rhs.AES_GCM_CTR_V1)) + return false; + return true; +} + +EncryptionAlgorithm::EncryptionAlgorithm(const EncryptionAlgorithm& other344) { + AES_GCM_V1 = other344.AES_GCM_V1; + AES_GCM_CTR_V1 = other344.AES_GCM_CTR_V1; + __isset = other344.__isset; +} +EncryptionAlgorithm::EncryptionAlgorithm(EncryptionAlgorithm&& other345) noexcept { + AES_GCM_V1 = std::move(other345.AES_GCM_V1); + AES_GCM_CTR_V1 = std::move(other345.AES_GCM_CTR_V1); + __isset = other345.__isset; } -EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other319) { - AES_GCM_V1 = other319.AES_GCM_V1; - AES_GCM_CTR_V1 = other319.AES_GCM_CTR_V1; - __isset = other319.__isset; +EncryptionAlgorithm& EncryptionAlgorithm::operator=(const EncryptionAlgorithm& other346) { + AES_GCM_V1 = other346.AES_GCM_V1; + AES_GCM_CTR_V1 = other346.AES_GCM_CTR_V1; + __isset = other346.__isset; return *this; } -EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other320) noexcept { - AES_GCM_V1 = std::move(other320.AES_GCM_V1); - AES_GCM_CTR_V1 = std::move(other320.AES_GCM_CTR_V1); - __isset = other320.__isset; +EncryptionAlgorithm& EncryptionAlgorithm::operator=(EncryptionAlgorithm&& other347) noexcept { + AES_GCM_V1 = std::move(other347.AES_GCM_V1); + AES_GCM_CTR_V1 = std::move(other347.AES_GCM_CTR_V1); + __isset = other347.__isset; return *this; } void EncryptionAlgorithm::printTo(std::ostream& out) const { @@ -3939,6 +5347,12 @@ void EncryptionAlgorithm::printTo(std::ostream& out) const { FileMetaData::~FileMetaData() noexcept { } +FileMetaData::FileMetaData() noexcept + : version(0), + num_rows(0), + created_by(), + footer_signing_key_metadata() { +} void FileMetaData::__set_version(const int32_t val) { this->version = val; @@ -4001,54 +5415,87 @@ void swap(FileMetaData &a, FileMetaData &b) { swap(a.__isset, b.__isset); } -FileMetaData::FileMetaData(const FileMetaData& other345) { - version = other345.version; - schema = other345.schema; - num_rows = other345.num_rows; - row_groups = other345.row_groups; - key_value_metadata = other345.key_value_metadata; - created_by = other345.created_by; - column_orders = other345.column_orders; - encryption_algorithm = other345.encryption_algorithm; - footer_signing_key_metadata = other345.footer_signing_key_metadata; - __isset = other345.__isset; -} -FileMetaData::FileMetaData(FileMetaData&& other346) noexcept { - version = other346.version; - schema = std::move(other346.schema); - num_rows = other346.num_rows; - row_groups = std::move(other346.row_groups); - key_value_metadata = std::move(other346.key_value_metadata); - created_by = std::move(other346.created_by); - column_orders = std::move(other346.column_orders); - encryption_algorithm = std::move(other346.encryption_algorithm); - footer_signing_key_metadata = std::move(other346.footer_signing_key_metadata); - __isset = other346.__isset; -} -FileMetaData& FileMetaData::operator=(const FileMetaData& other347) { - version = other347.version; - schema = other347.schema; - num_rows = other347.num_rows; - row_groups = other347.row_groups; - key_value_metadata = other347.key_value_metadata; - created_by = other347.created_by; - column_orders = other347.column_orders; - encryption_algorithm = other347.encryption_algorithm; - footer_signing_key_metadata = other347.footer_signing_key_metadata; - __isset = other347.__isset; +bool FileMetaData::operator==(const FileMetaData & rhs) const +{ + if (!(version == rhs.version)) + return false; + if (!(schema == rhs.schema)) + return false; + if (!(num_rows == rhs.num_rows)) + return false; + if (!(row_groups == rhs.row_groups)) + return false; + if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) + return false; + else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) + return false; + if (__isset.created_by != rhs.__isset.created_by) + return false; + else if (__isset.created_by && !(created_by == rhs.created_by)) + return false; + if (__isset.column_orders != rhs.__isset.column_orders) + return false; + else if (__isset.column_orders && !(column_orders == rhs.column_orders)) + return false; + if (__isset.encryption_algorithm != rhs.__isset.encryption_algorithm) + return false; + else if (__isset.encryption_algorithm && !(encryption_algorithm == rhs.encryption_algorithm)) + return false; + if (__isset.footer_signing_key_metadata != rhs.__isset.footer_signing_key_metadata) + return false; + else if (__isset.footer_signing_key_metadata && !(footer_signing_key_metadata == rhs.footer_signing_key_metadata)) + return false; + return true; +} + +FileMetaData::FileMetaData(const FileMetaData& other372) { + version = other372.version; + schema = other372.schema; + num_rows = other372.num_rows; + row_groups = other372.row_groups; + key_value_metadata = other372.key_value_metadata; + created_by = other372.created_by; + column_orders = other372.column_orders; + encryption_algorithm = other372.encryption_algorithm; + footer_signing_key_metadata = other372.footer_signing_key_metadata; + __isset = other372.__isset; +} +FileMetaData::FileMetaData(FileMetaData&& other373) noexcept { + version = other373.version; + schema = std::move(other373.schema); + num_rows = other373.num_rows; + row_groups = std::move(other373.row_groups); + key_value_metadata = std::move(other373.key_value_metadata); + created_by = std::move(other373.created_by); + column_orders = std::move(other373.column_orders); + encryption_algorithm = std::move(other373.encryption_algorithm); + footer_signing_key_metadata = std::move(other373.footer_signing_key_metadata); + __isset = other373.__isset; +} +FileMetaData& FileMetaData::operator=(const FileMetaData& other374) { + version = other374.version; + schema = other374.schema; + num_rows = other374.num_rows; + row_groups = other374.row_groups; + key_value_metadata = other374.key_value_metadata; + created_by = other374.created_by; + column_orders = other374.column_orders; + encryption_algorithm = other374.encryption_algorithm; + footer_signing_key_metadata = other374.footer_signing_key_metadata; + __isset = other374.__isset; return *this; } -FileMetaData& FileMetaData::operator=(FileMetaData&& other348) noexcept { - version = other348.version; - schema = std::move(other348.schema); - num_rows = other348.num_rows; - row_groups = std::move(other348.row_groups); - key_value_metadata = std::move(other348.key_value_metadata); - created_by = std::move(other348.created_by); - column_orders = std::move(other348.column_orders); - encryption_algorithm = std::move(other348.encryption_algorithm); - footer_signing_key_metadata = std::move(other348.footer_signing_key_metadata); - __isset = other348.__isset; +FileMetaData& FileMetaData::operator=(FileMetaData&& other375) noexcept { + version = other375.version; + schema = std::move(other375.schema); + num_rows = other375.num_rows; + row_groups = std::move(other375.row_groups); + key_value_metadata = std::move(other375.key_value_metadata); + created_by = std::move(other375.created_by); + column_orders = std::move(other375.column_orders); + encryption_algorithm = std::move(other375.encryption_algorithm); + footer_signing_key_metadata = std::move(other375.footer_signing_key_metadata); + __isset = other375.__isset; return *this; } void FileMetaData::printTo(std::ostream& out) const { @@ -4070,6 +5517,9 @@ void FileMetaData::printTo(std::ostream& out) const { FileCryptoMetaData::~FileCryptoMetaData() noexcept { } +FileCryptoMetaData::FileCryptoMetaData() noexcept + : key_metadata() { +} void FileCryptoMetaData::__set_encryption_algorithm(const EncryptionAlgorithm& val) { this->encryption_algorithm = val; @@ -4093,26 +5543,37 @@ void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) { swap(a.__isset, b.__isset); } -FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other349) { - encryption_algorithm = other349.encryption_algorithm; - key_metadata = other349.key_metadata; - __isset = other349.__isset; -} -FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other350) noexcept { - encryption_algorithm = std::move(other350.encryption_algorithm); - key_metadata = std::move(other350.key_metadata); - __isset = other350.__isset; -} -FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other351) { - encryption_algorithm = other351.encryption_algorithm; - key_metadata = other351.key_metadata; - __isset = other351.__isset; +bool FileCryptoMetaData::operator==(const FileCryptoMetaData & rhs) const +{ + if (!(encryption_algorithm == rhs.encryption_algorithm)) + return false; + if (__isset.key_metadata != rhs.__isset.key_metadata) + return false; + else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata)) + return false; + return true; +} + +FileCryptoMetaData::FileCryptoMetaData(const FileCryptoMetaData& other376) { + encryption_algorithm = other376.encryption_algorithm; + key_metadata = other376.key_metadata; + __isset = other376.__isset; +} +FileCryptoMetaData::FileCryptoMetaData(FileCryptoMetaData&& other377) noexcept { + encryption_algorithm = std::move(other377.encryption_algorithm); + key_metadata = std::move(other377.key_metadata); + __isset = other377.__isset; +} +FileCryptoMetaData& FileCryptoMetaData::operator=(const FileCryptoMetaData& other378) { + encryption_algorithm = other378.encryption_algorithm; + key_metadata = other378.key_metadata; + __isset = other378.__isset; return *this; } -FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other352) noexcept { - encryption_algorithm = std::move(other352.encryption_algorithm); - key_metadata = std::move(other352.key_metadata); - __isset = other352.__isset; +FileCryptoMetaData& FileCryptoMetaData::operator=(FileCryptoMetaData&& other379) noexcept { + encryption_algorithm = std::move(other379.encryption_algorithm); + key_metadata = std::move(other379.key_metadata); + __isset = other379.__isset; return *this; } void FileCryptoMetaData::printTo(std::ostream& out) const { diff --git a/cpp/src/generated/parquet_types.h b/cpp/src/generated/parquet_types.h index 6cf85fe5e73cc..0ccaf89224599 100644 --- a/cpp/src/generated/parquet_types.h +++ b/cpp/src/generated/parquet_types.h @@ -1,5 +1,5 @@ /** - * Autogenerated by Thrift Compiler (0.19.0) + * Autogenerated by Thrift Compiler (0.21.0) * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * @generated @@ -50,7 +50,7 @@ std::string to_string(const Type::type& val); /** * DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet. * ConvertedType is superseded by LogicalType. This enum should not be extended. - * + * * See LogicalTypes.md for conversion between ConvertedType and LogicalType. */ struct ConvertedType { @@ -73,17 +73,17 @@ struct ConvertedType { */ LIST = 3, /** - * an enum is converted into a binary field + * an enum is converted into a BYTE_ARRAY field */ ENUM = 4, /** * A decimal value. - * - * This may be used to annotate binary or fixed primitive types. The - * underlying byte array stores the unscaled value encoded as two's + * + * This may be used to annotate BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY primitive + * types. The underlying byte array stores the unscaled value encoded as two's * complement using big-endian byte order (the most significant byte is the * zeroth element). The value of the decimal is the value * 10^{-scale}. - * + * * This must be accompanied by a (maximum) precision and a scale in the * SchemaElement. The precision specifies the number of digits in the decimal * and the scale stores the location of the decimal point. For example 1.23 @@ -93,47 +93,47 @@ struct ConvertedType { DECIMAL = 5, /** * A Date - * + * * Stored as days since Unix epoch, encoded as the INT32 physical type. - * + * */ DATE = 6, /** * A time - * + * * The total number of milliseconds since midnight. The value is stored * as an INT32 physical type. */ TIME_MILLIS = 7, /** * A time. - * + * * The total number of microseconds since midnight. The value is stored as * an INT64 physical type. */ TIME_MICROS = 8, /** * A date/time combination - * + * * Date and time recorded as milliseconds since the Unix epoch. Recorded as * a physical type of INT64. */ TIMESTAMP_MILLIS = 9, /** * A date/time combination - * + * * Date and time recorded as microseconds since the Unix epoch. The value is * stored as an INT64 physical type. */ TIMESTAMP_MICROS = 10, /** * An unsigned integer value. - * + * * The number describes the maximum number of meaningful data bits in * the stored value. 8, 16 and 32 bit values are stored using the * INT32 physical type. 64 bit values are stored using the INT64 * physical type. - * + * */ UINT_8 = 11, UINT_16 = 12, @@ -141,12 +141,12 @@ struct ConvertedType { UINT_64 = 14, /** * A signed integer value. - * + * * The number describes the maximum number of meaningful data bits in * the stored value. 8, 16 and 32 bit values are stored using the * INT32 physical type. 64 bit values are stored using the INT64 * physical type. - * + * */ INT_8 = 15, INT_16 = 16, @@ -154,19 +154,19 @@ struct ConvertedType { INT_64 = 18, /** * An embedded JSON document - * + * * A JSON document embedded within a single UTF8 column. */ JSON = 19, /** * An embedded BSON document - * - * A BSON document embedded within a single BINARY column. + * + * A BSON document embedded within a single BYTE_ARRAY column. */ BSON = 20, /** * An interval of time - * + * * This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12 * This data is composed of three separate little endian unsigned * integers. Each stores a component of a duration of time. The first @@ -192,11 +192,11 @@ std::string to_string(const ConvertedType::type& val); struct FieldRepetitionType { enum type { /** - * This field is required (can not be null) and each record has exactly 1 value. + * This field is required (can not be null) and each row has exactly 1 value. */ REQUIRED = 0, /** - * The field is optional (can be null) and each record has 0 or 1 values. + * The field is optional (can be null) and each row has 0 or 1 values. */ OPTIONAL = 1, /** @@ -212,6 +212,25 @@ std::ostream& operator<<(std::ostream& out, const FieldRepetitionType::type& val std::string to_string(const FieldRepetitionType::type& val); +/** + * Edge interpolation algorithm for Geography logical type + */ +struct EdgeInterpolationAlgorithm { + enum type { + SPHERICAL = 0, + VINCENTY = 1, + THOMAS = 2, + ANDOYER = 3, + KARNEY = 4 + }; +}; + +extern const std::map _EdgeInterpolationAlgorithm_VALUES_TO_NAMES; + +std::ostream& operator<<(std::ostream& out, const EdgeInterpolationAlgorithm::type& val); + +std::string to_string(const EdgeInterpolationAlgorithm::type& val); + /** * Encodings supported by Parquet. Not all encodings are valid for all types. These * enums are also used to specify the encoding of definition and repetition levels. @@ -267,12 +286,15 @@ struct Encoding { */ RLE_DICTIONARY = 8, /** - * Encoding for floating-point data. + * Encoding for fixed-width data (FLOAT, DOUBLE, INT32, INT64, FIXED_LEN_BYTE_ARRAY). * K byte-streams are created where K is the size in bytes of the data type. - * The individual bytes of an FP value are scattered to the corresponding stream and + * The individual bytes of a value are scattered to the corresponding stream and * the streams are concatenated. * This itself does not reduce the size of the data but can lead to better compression * afterwards. + * + * Added in 2.8 for FLOAT and DOUBLE. + * Support for INT32, INT64 and FIXED_LEN_BYTE_ARRAY added in 2.11. */ BYTE_STREAM_SPLIT = 9 }; @@ -286,11 +308,11 @@ std::string to_string(const Encoding::type& val); /** * Supported compression algorithms. - * + * * Codecs added in format version X.Y can be read by readers based on X.Y and later. * Codec support may vary between readers based on the format version and * libraries available at runtime. - * + * * See Compression.md for a detailed specification of these algorithms. */ struct CompressionCodec { @@ -347,6 +369,10 @@ std::string to_string(const BoundaryOrder::type& val); class SizeStatistics; +class BoundingBox; + +class GeospatialStatistics; + class Statistics; class StringType; @@ -385,6 +411,12 @@ class JsonType; class BsonType; +class VariantType; + +class GeometryType; + +class GeographyType; + class LogicalType; class SchemaElement; @@ -473,9 +505,7 @@ class SizeStatistics { SizeStatistics(SizeStatistics&&) noexcept; SizeStatistics& operator=(const SizeStatistics&); SizeStatistics& operator=(SizeStatistics&&) noexcept; - SizeStatistics() noexcept - : unencoded_byte_array_data_bytes(0) { - } + SizeStatistics() noexcept; virtual ~SizeStatistics() noexcept; /** @@ -487,11 +517,11 @@ class SizeStatistics { * schema information multiplied by the number of non-null and null values. * The number of null/non-null values can be inferred from the histograms * below. - * + * * For example, if a column chunk is dictionary-encoded with dictionary * ["a", "bc", "cde"], and a data page contains the indices [0, 0, 1, 2], * then this value for that data page should be 7 (1 + 1 + 2 + 3). - * + * * This field should only be set for types that use BYTE_ARRAY as their * physical type. */ @@ -501,18 +531,18 @@ class SizeStatistics { * repetition (i.e. size=max repetition_level+1) where each element * represents the number of times the repetition level was observed in the * data. - * + * * This field may be omitted if max_repetition_level is 0 without loss * of information. - * + * */ std::vector repetition_level_histogram; /** * Same as repetition_level_histogram except for definition levels. - * + * * This field may be omitted if max_definition_level is 0 or 1 without * loss of information. - * + * */ std::vector definition_level_histogram; @@ -524,22 +554,7 @@ class SizeStatistics { void __set_definition_level_histogram(const std::vector & val); - bool operator == (const SizeStatistics & rhs) const - { - if (__isset.unencoded_byte_array_data_bytes != rhs.__isset.unencoded_byte_array_data_bytes) - return false; - else if (__isset.unencoded_byte_array_data_bytes && !(unencoded_byte_array_data_bytes == rhs.unencoded_byte_array_data_bytes)) - return false; - if (__isset.repetition_level_histogram != rhs.__isset.repetition_level_histogram) - return false; - else if (__isset.repetition_level_histogram && !(repetition_level_histogram == rhs.repetition_level_histogram)) - return false; - if (__isset.definition_level_histogram != rhs.__isset.definition_level_histogram) - return false; - else if (__isset.definition_level_histogram && !(definition_level_histogram == rhs.definition_level_histogram)) - return false; - return true; - } + bool operator == (const SizeStatistics & rhs) const; bool operator != (const SizeStatistics &rhs) const { return !(*this == rhs); } @@ -558,6 +573,127 @@ void swap(SizeStatistics &a, SizeStatistics &b); std::ostream& operator<<(std::ostream& out, const SizeStatistics& obj); +typedef struct _BoundingBox__isset { + _BoundingBox__isset() : zmin(false), zmax(false), mmin(false), mmax(false) {} + bool zmin :1; + bool zmax :1; + bool mmin :1; + bool mmax :1; +} _BoundingBox__isset; + +/** + * Bounding box for GEOMETRY or GEOGRAPHY type in the representation of min/max + * value pair of coordinates from each axis. + */ +class BoundingBox { + public: + + BoundingBox(const BoundingBox&) noexcept; + BoundingBox(BoundingBox&&) noexcept; + BoundingBox& operator=(const BoundingBox&) noexcept; + BoundingBox& operator=(BoundingBox&&) noexcept; + BoundingBox() noexcept; + + virtual ~BoundingBox() noexcept; + double xmin; + double xmax; + double ymin; + double ymax; + double zmin; + double zmax; + double mmin; + double mmax; + + _BoundingBox__isset __isset; + + void __set_xmin(const double val); + + void __set_xmax(const double val); + + void __set_ymin(const double val); + + void __set_ymax(const double val); + + void __set_zmin(const double val); + + void __set_zmax(const double val); + + void __set_mmin(const double val); + + void __set_mmax(const double val); + + bool operator == (const BoundingBox & rhs) const; + bool operator != (const BoundingBox &rhs) const { + return !(*this == rhs); + } + + bool operator < (const BoundingBox & ) const; + + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; + + virtual void printTo(std::ostream& out) const; +}; + +void swap(BoundingBox &a, BoundingBox &b); + +std::ostream& operator<<(std::ostream& out, const BoundingBox& obj); + +typedef struct _GeospatialStatistics__isset { + _GeospatialStatistics__isset() : bbox(false), geospatial_types(false) {} + bool bbox :1; + bool geospatial_types :1; +} _GeospatialStatistics__isset; + +/** + * Statistics specific to Geometry and Geography logical types + */ +class GeospatialStatistics { + public: + + GeospatialStatistics(const GeospatialStatistics&); + GeospatialStatistics(GeospatialStatistics&&) noexcept; + GeospatialStatistics& operator=(const GeospatialStatistics&); + GeospatialStatistics& operator=(GeospatialStatistics&&) noexcept; + GeospatialStatistics() noexcept; + + virtual ~GeospatialStatistics() noexcept; + /** + * A bounding box of geospatial instances + */ + BoundingBox bbox; + /** + * Geospatial type codes of all instances, or an empty list if not known + */ + std::vector geospatial_types; + + _GeospatialStatistics__isset __isset; + + void __set_bbox(const BoundingBox& val); + + void __set_geospatial_types(const std::vector & val); + + bool operator == (const GeospatialStatistics & rhs) const; + bool operator != (const GeospatialStatistics &rhs) const { + return !(*this == rhs); + } + + bool operator < (const GeospatialStatistics & ) const; + + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; + + virtual void printTo(std::ostream& out) const; +}; + +void swap(GeospatialStatistics &a, GeospatialStatistics &b); + +std::ostream& operator<<(std::ostream& out, const GeospatialStatistics& obj); + typedef struct _Statistics__isset { _Statistics__isset() : max(false), min(false), null_count(false), distinct_count(false), max_value(false), min_value(false), is_max_value_exact(false), is_min_value_exact(false) {} bool max :1; @@ -581,35 +717,31 @@ class Statistics { Statistics(Statistics&&) noexcept; Statistics& operator=(const Statistics&); Statistics& operator=(Statistics&&) noexcept; - Statistics() noexcept - : max(), - min(), - null_count(0), - distinct_count(0), - max_value(), - min_value(), - is_max_value_exact(0), - is_min_value_exact(0) { - } + Statistics() noexcept; virtual ~Statistics() noexcept; /** * DEPRECATED: min and max value of the column. Use min_value and max_value. - * + * * Values are encoded using PLAIN encoding, except that variable-length byte * arrays do not include a length prefix. - * + * * These fields encode min and max values determined by signed comparison * only. New files should use the correct order for a column's logical type * and store the values in the min_value and max_value fields. - * + * * To support older readers, these may be set when the column order is * signed. */ std::string max; std::string min; /** - * count of null value in the column + * Count of null values in the column. + * + * Writers SHOULD always write this field even if it is zero (i.e. no null value) + * or the column is not nullable. + * Readers MUST distinguish between null_count not being present and null_count == 0. + * If null_count is not present, readers MUST NOT assume null_count == 0. */ int64_t null_count; /** @@ -618,13 +750,13 @@ class Statistics { int64_t distinct_count; /** * Lower and upper bound values for the column, determined by its ColumnOrder. - * + * * These may be the actual minimum and maximum values found on a page or column * chunk, but can also be (more compact) values that do not exist on a page or * column chunk. For example, instead of storing "Blart Versenwald III", a writer * may set min_value="B", max_value="C". Such more compact values must still be * valid values within the column's logical type. - * + * * Values are encoded using PLAIN encoding, except that variable-length byte * arrays do not include a length prefix. */ @@ -657,42 +789,7 @@ class Statistics { void __set_is_min_value_exact(const bool val); - bool operator == (const Statistics & rhs) const - { - if (__isset.max != rhs.__isset.max) - return false; - else if (__isset.max && !(max == rhs.max)) - return false; - if (__isset.min != rhs.__isset.min) - return false; - else if (__isset.min && !(min == rhs.min)) - return false; - if (__isset.null_count != rhs.__isset.null_count) - return false; - else if (__isset.null_count && !(null_count == rhs.null_count)) - return false; - if (__isset.distinct_count != rhs.__isset.distinct_count) - return false; - else if (__isset.distinct_count && !(distinct_count == rhs.distinct_count)) - return false; - if (__isset.max_value != rhs.__isset.max_value) - return false; - else if (__isset.max_value && !(max_value == rhs.max_value)) - return false; - if (__isset.min_value != rhs.__isset.min_value) - return false; - else if (__isset.min_value && !(min_value == rhs.min_value)) - return false; - if (__isset.is_max_value_exact != rhs.__isset.is_max_value_exact) - return false; - else if (__isset.is_max_value_exact && !(is_max_value_exact == rhs.is_max_value_exact)) - return false; - if (__isset.is_min_value_exact != rhs.__isset.is_min_value_exact) - return false; - else if (__isset.is_min_value_exact && !(is_min_value_exact == rhs.is_min_value_exact)) - return false; - return true; - } + bool operator == (const Statistics & rhs) const; bool operator != (const Statistics &rhs) const { return !(*this == rhs); } @@ -722,15 +819,11 @@ class StringType { StringType(StringType&&) noexcept; StringType& operator=(const StringType&) noexcept; StringType& operator=(StringType&&) noexcept; - StringType() noexcept { - } + StringType() noexcept; virtual ~StringType() noexcept; - bool operator == (const StringType & /* rhs */) const - { - return true; - } + bool operator == (const StringType & /* rhs */) const; bool operator != (const StringType &rhs) const { return !(*this == rhs); } @@ -757,15 +850,11 @@ class UUIDType { UUIDType(UUIDType&&) noexcept; UUIDType& operator=(const UUIDType&) noexcept; UUIDType& operator=(UUIDType&&) noexcept; - UUIDType() noexcept { - } + UUIDType() noexcept; virtual ~UUIDType() noexcept; - bool operator == (const UUIDType & /* rhs */) const - { - return true; - } + bool operator == (const UUIDType & /* rhs */) const; bool operator != (const UUIDType &rhs) const { return !(*this == rhs); } @@ -792,15 +881,11 @@ class MapType { MapType(MapType&&) noexcept; MapType& operator=(const MapType&) noexcept; MapType& operator=(MapType&&) noexcept; - MapType() noexcept { - } + MapType() noexcept; virtual ~MapType() noexcept; - bool operator == (const MapType & /* rhs */) const - { - return true; - } + bool operator == (const MapType & /* rhs */) const; bool operator != (const MapType &rhs) const { return !(*this == rhs); } @@ -827,15 +912,11 @@ class ListType { ListType(ListType&&) noexcept; ListType& operator=(const ListType&) noexcept; ListType& operator=(ListType&&) noexcept; - ListType() noexcept { - } + ListType() noexcept; virtual ~ListType() noexcept; - bool operator == (const ListType & /* rhs */) const - { - return true; - } + bool operator == (const ListType & /* rhs */) const; bool operator != (const ListType &rhs) const { return !(*this == rhs); } @@ -862,15 +943,11 @@ class EnumType { EnumType(EnumType&&) noexcept; EnumType& operator=(const EnumType&) noexcept; EnumType& operator=(EnumType&&) noexcept; - EnumType() noexcept { - } + EnumType() noexcept; virtual ~EnumType() noexcept; - bool operator == (const EnumType & /* rhs */) const - { - return true; - } + bool operator == (const EnumType & /* rhs */) const; bool operator != (const EnumType &rhs) const { return !(*this == rhs); } @@ -897,15 +974,11 @@ class DateType { DateType(DateType&&) noexcept; DateType& operator=(const DateType&) noexcept; DateType& operator=(DateType&&) noexcept; - DateType() noexcept { - } + DateType() noexcept; virtual ~DateType() noexcept; - bool operator == (const DateType & /* rhs */) const - { - return true; - } + bool operator == (const DateType & /* rhs */) const; bool operator != (const DateType &rhs) const { return !(*this == rhs); } @@ -932,15 +1005,11 @@ class Float16Type { Float16Type(Float16Type&&) noexcept; Float16Type& operator=(const Float16Type&) noexcept; Float16Type& operator=(Float16Type&&) noexcept; - Float16Type() noexcept { - } + Float16Type() noexcept; virtual ~Float16Type() noexcept; - bool operator == (const Float16Type & /* rhs */) const - { - return true; - } + bool operator == (const Float16Type & /* rhs */) const; bool operator != (const Float16Type &rhs) const { return !(*this == rhs); } @@ -962,7 +1031,7 @@ std::ostream& operator<<(std::ostream& out, const Float16Type& obj); /** * Logical type to annotate a column that is always null. - * + * * Sometimes when discovering the schema of existing data, values are always * null and the physical type can't be determined. This annotation signals * the case where the physical type was guessed from all null values. @@ -974,15 +1043,11 @@ class NullType { NullType(NullType&&) noexcept; NullType& operator=(const NullType&) noexcept; NullType& operator=(NullType&&) noexcept; - NullType() noexcept { - } + NullType() noexcept; virtual ~NullType() noexcept; - bool operator == (const NullType & /* rhs */) const - { - return true; - } + bool operator == (const NullType & /* rhs */) const; bool operator != (const NullType &rhs) const { return !(*this == rhs); } @@ -1004,14 +1069,14 @@ std::ostream& operator<<(std::ostream& out, const NullType& obj); /** * Decimal logical type annotation - * + * * Scale must be zero or a positive integer less than or equal to the precision. * Precision must be a non-zero positive integer. - * + * * To maintain forward-compatibility in v1, implementations using this logical * type must also set scale and precision on the annotated SchemaElement. - * - * Allowed for physical types: INT32, INT64, FIXED, and BINARY + * + * Allowed for physical types: INT32, INT64, FIXED_LEN_BYTE_ARRAY, and BYTE_ARRAY. */ class DecimalType { public: @@ -1020,10 +1085,7 @@ class DecimalType { DecimalType(DecimalType&&) noexcept; DecimalType& operator=(const DecimalType&) noexcept; DecimalType& operator=(DecimalType&&) noexcept; - DecimalType() noexcept - : scale(0), - precision(0) { - } + DecimalType() noexcept; virtual ~DecimalType() noexcept; int32_t scale; @@ -1033,14 +1095,7 @@ class DecimalType { void __set_precision(const int32_t val); - bool operator == (const DecimalType & rhs) const - { - if (!(scale == rhs.scale)) - return false; - if (!(precision == rhs.precision)) - return false; - return true; - } + bool operator == (const DecimalType & rhs) const; bool operator != (const DecimalType &rhs) const { return !(*this == rhs); } @@ -1070,15 +1125,11 @@ class MilliSeconds { MilliSeconds(MilliSeconds&&) noexcept; MilliSeconds& operator=(const MilliSeconds&) noexcept; MilliSeconds& operator=(MilliSeconds&&) noexcept; - MilliSeconds() noexcept { - } + MilliSeconds() noexcept; virtual ~MilliSeconds() noexcept; - bool operator == (const MilliSeconds & /* rhs */) const - { - return true; - } + bool operator == (const MilliSeconds & /* rhs */) const; bool operator != (const MilliSeconds &rhs) const { return !(*this == rhs); } @@ -1105,15 +1156,11 @@ class MicroSeconds { MicroSeconds(MicroSeconds&&) noexcept; MicroSeconds& operator=(const MicroSeconds&) noexcept; MicroSeconds& operator=(MicroSeconds&&) noexcept; - MicroSeconds() noexcept { - } + MicroSeconds() noexcept; virtual ~MicroSeconds() noexcept; - bool operator == (const MicroSeconds & /* rhs */) const - { - return true; - } + bool operator == (const MicroSeconds & /* rhs */) const; bool operator != (const MicroSeconds &rhs) const { return !(*this == rhs); } @@ -1140,15 +1187,11 @@ class NanoSeconds { NanoSeconds(NanoSeconds&&) noexcept; NanoSeconds& operator=(const NanoSeconds&) noexcept; NanoSeconds& operator=(NanoSeconds&&) noexcept; - NanoSeconds() noexcept { - } + NanoSeconds() noexcept; virtual ~NanoSeconds() noexcept; - bool operator == (const NanoSeconds & /* rhs */) const - { - return true; - } + bool operator == (const NanoSeconds & /* rhs */) const; bool operator != (const NanoSeconds &rhs) const { return !(*this == rhs); } @@ -1181,8 +1224,7 @@ class TimeUnit { TimeUnit(TimeUnit&&) noexcept; TimeUnit& operator=(const TimeUnit&) noexcept; TimeUnit& operator=(TimeUnit&&) noexcept; - TimeUnit() noexcept { - } + TimeUnit() noexcept; virtual ~TimeUnit() noexcept; MilliSeconds MILLIS; @@ -1197,22 +1239,7 @@ class TimeUnit { void __set_NANOS(const NanoSeconds& val); - bool operator == (const TimeUnit & rhs) const - { - if (__isset.MILLIS != rhs.__isset.MILLIS) - return false; - else if (__isset.MILLIS && !(MILLIS == rhs.MILLIS)) - return false; - if (__isset.MICROS != rhs.__isset.MICROS) - return false; - else if (__isset.MICROS && !(MICROS == rhs.MICROS)) - return false; - if (__isset.NANOS != rhs.__isset.NANOS) - return false; - else if (__isset.NANOS && !(NANOS == rhs.NANOS)) - return false; - return true; - } + bool operator == (const TimeUnit & rhs) const; bool operator != (const TimeUnit &rhs) const { return !(*this == rhs); } @@ -1234,7 +1261,7 @@ std::ostream& operator<<(std::ostream& out, const TimeUnit& obj); /** * Timestamp logical type annotation - * + * * Allowed for physical types: INT64 */ class TimestampType { @@ -1244,9 +1271,7 @@ class TimestampType { TimestampType(TimestampType&&) noexcept; TimestampType& operator=(const TimestampType&) noexcept; TimestampType& operator=(TimestampType&&) noexcept; - TimestampType() noexcept - : isAdjustedToUTC(0) { - } + TimestampType() noexcept; virtual ~TimestampType() noexcept; bool isAdjustedToUTC; @@ -1256,14 +1281,7 @@ class TimestampType { void __set_unit(const TimeUnit& val); - bool operator == (const TimestampType & rhs) const - { - if (!(isAdjustedToUTC == rhs.isAdjustedToUTC)) - return false; - if (!(unit == rhs.unit)) - return false; - return true; - } + bool operator == (const TimestampType & rhs) const; bool operator != (const TimestampType &rhs) const { return !(*this == rhs); } @@ -1285,7 +1303,7 @@ std::ostream& operator<<(std::ostream& out, const TimestampType& obj); /** * Time logical type annotation - * + * * Allowed for physical types: INT32 (millis), INT64 (micros, nanos) */ class TimeType { @@ -1295,9 +1313,7 @@ class TimeType { TimeType(TimeType&&) noexcept; TimeType& operator=(const TimeType&) noexcept; TimeType& operator=(TimeType&&) noexcept; - TimeType() noexcept - : isAdjustedToUTC(0) { - } + TimeType() noexcept; virtual ~TimeType() noexcept; bool isAdjustedToUTC; @@ -1307,14 +1323,7 @@ class TimeType { void __set_unit(const TimeUnit& val); - bool operator == (const TimeType & rhs) const - { - if (!(isAdjustedToUTC == rhs.isAdjustedToUTC)) - return false; - if (!(unit == rhs.unit)) - return false; - return true; - } + bool operator == (const TimeType & rhs) const; bool operator != (const TimeType &rhs) const { return !(*this == rhs); } @@ -1336,9 +1345,9 @@ std::ostream& operator<<(std::ostream& out, const TimeType& obj); /** * Integer logical type annotation - * + * * bitWidth must be 8, 16, 32, or 64. - * + * * Allowed for physical types: INT32, INT64 */ class IntType { @@ -1348,10 +1357,7 @@ class IntType { IntType(IntType&&) noexcept; IntType& operator=(const IntType&) noexcept; IntType& operator=(IntType&&) noexcept; - IntType() noexcept - : bitWidth(0), - isSigned(0) { - } + IntType() noexcept; virtual ~IntType() noexcept; int8_t bitWidth; @@ -1361,14 +1367,7 @@ class IntType { void __set_isSigned(const bool val); - bool operator == (const IntType & rhs) const - { - if (!(bitWidth == rhs.bitWidth)) - return false; - if (!(isSigned == rhs.isSigned)) - return false; - return true; - } + bool operator == (const IntType & rhs) const; bool operator != (const IntType &rhs) const { return !(*this == rhs); } @@ -1390,8 +1389,8 @@ std::ostream& operator<<(std::ostream& out, const IntType& obj); /** * Embedded JSON logical type annotation - * - * Allowed for physical types: BINARY + * + * Allowed for physical types: BYTE_ARRAY */ class JsonType { public: @@ -1400,15 +1399,11 @@ class JsonType { JsonType(JsonType&&) noexcept; JsonType& operator=(const JsonType&) noexcept; JsonType& operator=(JsonType&&) noexcept; - JsonType() noexcept { - } + JsonType() noexcept; virtual ~JsonType() noexcept; - bool operator == (const JsonType & /* rhs */) const - { - return true; - } + bool operator == (const JsonType & /* rhs */) const; bool operator != (const JsonType &rhs) const { return !(*this == rhs); } @@ -1430,8 +1425,8 @@ std::ostream& operator<<(std::ostream& out, const JsonType& obj); /** * Embedded BSON logical type annotation - * - * Allowed for physical types: BINARY + * + * Allowed for physical types: BYTE_ARRAY */ class BsonType { public: @@ -1440,15 +1435,11 @@ class BsonType { BsonType(BsonType&&) noexcept; BsonType& operator=(const BsonType&) noexcept; BsonType& operator=(BsonType&&) noexcept; - BsonType() noexcept { - } + BsonType() noexcept; virtual ~BsonType() noexcept; - bool operator == (const BsonType & /* rhs */) const - { - return true; - } + bool operator == (const BsonType & /* rhs */) const; bool operator != (const BsonType &rhs) const { return !(*this == rhs); } @@ -1467,8 +1458,161 @@ void swap(BsonType &a, BsonType &b); std::ostream& operator<<(std::ostream& out, const BsonType& obj); + +/** + * Embedded Variant logical type annotation + */ +class VariantType { + public: + + VariantType(const VariantType&) noexcept; + VariantType(VariantType&&) noexcept; + VariantType& operator=(const VariantType&) noexcept; + VariantType& operator=(VariantType&&) noexcept; + VariantType() noexcept; + + virtual ~VariantType() noexcept; + + bool operator == (const VariantType & /* rhs */) const; + bool operator != (const VariantType &rhs) const { + return !(*this == rhs); + } + + bool operator < (const VariantType & ) const; + + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; + + virtual void printTo(std::ostream& out) const; +}; + +void swap(VariantType &a, VariantType &b); + +std::ostream& operator<<(std::ostream& out, const VariantType& obj); + +typedef struct _GeometryType__isset { + _GeometryType__isset() : crs(false) {} + bool crs :1; +} _GeometryType__isset; + +/** + * Embedded Geometry logical type annotation + * + * Geospatial features in the Well-Known Binary (WKB) format and edges interpolation + * is always linear/planar. + * + * A custom CRS can be set by the crs field. If unset, it defaults to "OGC:CRS84", + * which means that the geometries must be stored in longitude, latitude based on + * the WGS84 datum. + * + * Allowed for physical type: BYTE_ARRAY. + * + * See Geospatial.md for details. + */ +class GeometryType { + public: + + GeometryType(const GeometryType&); + GeometryType(GeometryType&&) noexcept; + GeometryType& operator=(const GeometryType&); + GeometryType& operator=(GeometryType&&) noexcept; + GeometryType() noexcept; + + virtual ~GeometryType() noexcept; + std::string crs; + + _GeometryType__isset __isset; + + void __set_crs(const std::string& val); + + bool operator == (const GeometryType & rhs) const; + bool operator != (const GeometryType &rhs) const { + return !(*this == rhs); + } + + bool operator < (const GeometryType & ) const; + + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; + + virtual void printTo(std::ostream& out) const; +}; + +void swap(GeometryType &a, GeometryType &b); + +std::ostream& operator<<(std::ostream& out, const GeometryType& obj); + +typedef struct _GeographyType__isset { + _GeographyType__isset() : crs(false), algorithm(false) {} + bool crs :1; + bool algorithm :1; +} _GeographyType__isset; + +/** + * Embedded Geography logical type annotation + * + * Geospatial features in the WKB format with an explicit (non-linear/non-planar) + * edges interpolation algorithm. + * + * A custom geographic CRS can be set by the crs field, where longitudes are + * bound by [-180, 180] and latitudes are bound by [-90, 90]. If unset, the CRS + * defaults to "OGC:CRS84". + * + * An optional algorithm can be set to correctly interpret edges interpolation + * of the geometries. If unset, the algorithm defaults to SPHERICAL. + * + * Allowed for physical type: BYTE_ARRAY. + * + * See Geospatial.md for details. + */ +class GeographyType { + public: + + GeographyType(const GeographyType&); + GeographyType(GeographyType&&) noexcept; + GeographyType& operator=(const GeographyType&); + GeographyType& operator=(GeographyType&&) noexcept; + GeographyType() noexcept; + + virtual ~GeographyType() noexcept; + std::string crs; + /** + * + * @see EdgeInterpolationAlgorithm + */ + EdgeInterpolationAlgorithm::type algorithm; + + _GeographyType__isset __isset; + + void __set_crs(const std::string& val); + + void __set_algorithm(const EdgeInterpolationAlgorithm::type val); + + bool operator == (const GeographyType & rhs) const; + bool operator != (const GeographyType &rhs) const { + return !(*this == rhs); + } + + bool operator < (const GeographyType & ) const; + + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; + + virtual void printTo(std::ostream& out) const; +}; + +void swap(GeographyType &a, GeographyType &b); + +std::ostream& operator<<(std::ostream& out, const GeographyType& obj); + typedef struct _LogicalType__isset { - _LogicalType__isset() : STRING(false), MAP(false), LIST(false), ENUM(false), DECIMAL(false), DATE(false), TIME(false), TIMESTAMP(false), INTEGER(false), UNKNOWN(false), JSON(false), BSON(false), UUID(false), FLOAT16(false) {} + _LogicalType__isset() : STRING(false), MAP(false), LIST(false), ENUM(false), DECIMAL(false), DATE(false), TIME(false), TIMESTAMP(false), INTEGER(false), UNKNOWN(false), JSON(false), BSON(false), UUID(false), FLOAT16(false), VARIANT(false), GEOMETRY(false), GEOGRAPHY(false) {} bool STRING :1; bool MAP :1; bool LIST :1; @@ -1483,11 +1627,14 @@ typedef struct _LogicalType__isset { bool BSON :1; bool UUID :1; bool FLOAT16 :1; + bool VARIANT :1; + bool GEOMETRY :1; + bool GEOGRAPHY :1; } _LogicalType__isset; /** * LogicalType annotations to replace ConvertedType. - * + * * To maintain compatibility, implementations using LogicalType for a * SchemaElement must also set the corresponding ConvertedType (if any) * from the following table. @@ -1495,12 +1642,11 @@ typedef struct _LogicalType__isset { class LogicalType { public: - LogicalType(const LogicalType&) noexcept; + LogicalType(const LogicalType&); LogicalType(LogicalType&&) noexcept; - LogicalType& operator=(const LogicalType&) noexcept; + LogicalType& operator=(const LogicalType&); LogicalType& operator=(LogicalType&&) noexcept; - LogicalType() noexcept { - } + LogicalType() noexcept; virtual ~LogicalType() noexcept; StringType STRING; @@ -1517,6 +1663,9 @@ class LogicalType { BsonType BSON; UUIDType UUID; Float16Type FLOAT16; + VariantType VARIANT; + GeometryType GEOMETRY; + GeographyType GEOGRAPHY; _LogicalType__isset __isset; @@ -1548,66 +1697,13 @@ class LogicalType { void __set_FLOAT16(const Float16Type& val); - bool operator == (const LogicalType & rhs) const - { - if (__isset.STRING != rhs.__isset.STRING) - return false; - else if (__isset.STRING && !(STRING == rhs.STRING)) - return false; - if (__isset.MAP != rhs.__isset.MAP) - return false; - else if (__isset.MAP && !(MAP == rhs.MAP)) - return false; - if (__isset.LIST != rhs.__isset.LIST) - return false; - else if (__isset.LIST && !(LIST == rhs.LIST)) - return false; - if (__isset.ENUM != rhs.__isset.ENUM) - return false; - else if (__isset.ENUM && !(ENUM == rhs.ENUM)) - return false; - if (__isset.DECIMAL != rhs.__isset.DECIMAL) - return false; - else if (__isset.DECIMAL && !(DECIMAL == rhs.DECIMAL)) - return false; - if (__isset.DATE != rhs.__isset.DATE) - return false; - else if (__isset.DATE && !(DATE == rhs.DATE)) - return false; - if (__isset.TIME != rhs.__isset.TIME) - return false; - else if (__isset.TIME && !(TIME == rhs.TIME)) - return false; - if (__isset.TIMESTAMP != rhs.__isset.TIMESTAMP) - return false; - else if (__isset.TIMESTAMP && !(TIMESTAMP == rhs.TIMESTAMP)) - return false; - if (__isset.INTEGER != rhs.__isset.INTEGER) - return false; - else if (__isset.INTEGER && !(INTEGER == rhs.INTEGER)) - return false; - if (__isset.UNKNOWN != rhs.__isset.UNKNOWN) - return false; - else if (__isset.UNKNOWN && !(UNKNOWN == rhs.UNKNOWN)) - return false; - if (__isset.JSON != rhs.__isset.JSON) - return false; - else if (__isset.JSON && !(JSON == rhs.JSON)) - return false; - if (__isset.BSON != rhs.__isset.BSON) - return false; - else if (__isset.BSON && !(BSON == rhs.BSON)) - return false; - if (__isset.UUID != rhs.__isset.UUID) - return false; - else if (__isset.UUID && !(UUID == rhs.UUID)) - return false; - if (__isset.FLOAT16 != rhs.__isset.FLOAT16) - return false; - else if (__isset.FLOAT16 && !(FLOAT16 == rhs.FLOAT16)) - return false; - return true; - } + void __set_VARIANT(const VariantType& val); + + void __set_GEOMETRY(const GeometryType& val); + + void __set_GEOGRAPHY(const GeographyType& val); + + bool operator == (const LogicalType & rhs) const; bool operator != (const LogicalType &rhs) const { return !(*this == rhs); } @@ -1652,22 +1748,12 @@ class SchemaElement { SchemaElement(SchemaElement&&) noexcept; SchemaElement& operator=(const SchemaElement&); SchemaElement& operator=(SchemaElement&&) noexcept; - SchemaElement() noexcept - : type(static_cast(0)), - type_length(0), - repetition_type(static_cast(0)), - name(), - num_children(0), - converted_type(static_cast(0)), - scale(0), - precision(0), - field_id(0) { - } + SchemaElement() noexcept; virtual ~SchemaElement() noexcept; /** * Data type for this field. Not set if the current element is a non-leaf node - * + * * @see Type */ Type::type type; @@ -1681,7 +1767,7 @@ class SchemaElement { /** * repetition of the field. The root of the schema does not have a repetition_type. * All other nodes must have one - * + * * @see FieldRepetitionType */ FieldRepetitionType::type repetition_type; @@ -1699,16 +1785,16 @@ class SchemaElement { /** * DEPRECATED: When the schema is the result of a conversion from another model. * Used to record the original type to help with cross conversion. - * + * * This is superseded by logicalType. - * + * * @see ConvertedType */ ConvertedType::type converted_type; /** * DEPRECATED: Used when this column contains decimal data. * See the DECIMAL converted type for more details. - * + * * This is superseded by using the DecimalType annotation in logicalType. */ int32_t scale; @@ -1720,7 +1806,7 @@ class SchemaElement { int32_t field_id; /** * The logical type of this SchemaElement - * + * * LogicalType replaces ConvertedType, but ConvertedType is still required * for some logical types to ensure forward-compatibility in format v1. */ @@ -1748,48 +1834,7 @@ class SchemaElement { void __set_logicalType(const LogicalType& val); - bool operator == (const SchemaElement & rhs) const - { - if (__isset.type != rhs.__isset.type) - return false; - else if (__isset.type && !(type == rhs.type)) - return false; - if (__isset.type_length != rhs.__isset.type_length) - return false; - else if (__isset.type_length && !(type_length == rhs.type_length)) - return false; - if (__isset.repetition_type != rhs.__isset.repetition_type) - return false; - else if (__isset.repetition_type && !(repetition_type == rhs.repetition_type)) - return false; - if (!(name == rhs.name)) - return false; - if (__isset.num_children != rhs.__isset.num_children) - return false; - else if (__isset.num_children && !(num_children == rhs.num_children)) - return false; - if (__isset.converted_type != rhs.__isset.converted_type) - return false; - else if (__isset.converted_type && !(converted_type == rhs.converted_type)) - return false; - if (__isset.scale != rhs.__isset.scale) - return false; - else if (__isset.scale && !(scale == rhs.scale)) - return false; - if (__isset.precision != rhs.__isset.precision) - return false; - else if (__isset.precision && !(precision == rhs.precision)) - return false; - if (__isset.field_id != rhs.__isset.field_id) - return false; - else if (__isset.field_id && !(field_id == rhs.field_id)) - return false; - if (__isset.logicalType != rhs.__isset.logicalType) - return false; - else if (__isset.logicalType && !(logicalType == rhs.logicalType)) - return false; - return true; - } + bool operator == (const SchemaElement & rhs) const; bool operator != (const SchemaElement &rhs) const { return !(*this == rhs); } @@ -1823,33 +1868,33 @@ class DataPageHeader { DataPageHeader(DataPageHeader&&) noexcept; DataPageHeader& operator=(const DataPageHeader&); DataPageHeader& operator=(DataPageHeader&&) noexcept; - DataPageHeader() noexcept - : num_values(0), - encoding(static_cast(0)), - definition_level_encoding(static_cast(0)), - repetition_level_encoding(static_cast(0)) { - } + DataPageHeader() noexcept; virtual ~DataPageHeader() noexcept; /** - * Number of values, including NULLs, in this data page. * + * Number of values, including NULLs, in this data page. + * + * If a OffsetIndex is present, a page must begin at a row + * boundary (repetition_level = 0). Otherwise, pages may begin + * within a row (repetition_level > 0). + * */ int32_t num_values; /** * Encoding used for this data page * - * + * * @see Encoding */ Encoding::type encoding; /** * Encoding used for definition levels * - * + * * @see Encoding */ Encoding::type definition_level_encoding; /** * Encoding used for repetition levels * - * + * * @see Encoding */ Encoding::type repetition_level_encoding; @@ -1870,22 +1915,7 @@ class DataPageHeader { void __set_statistics(const Statistics& val); - bool operator == (const DataPageHeader & rhs) const - { - if (!(num_values == rhs.num_values)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (!(definition_level_encoding == rhs.definition_level_encoding)) - return false; - if (!(repetition_level_encoding == rhs.repetition_level_encoding)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - return true; - } + bool operator == (const DataPageHeader & rhs) const; bool operator != (const DataPageHeader &rhs) const { return !(*this == rhs); } @@ -1912,15 +1942,11 @@ class IndexPageHeader { IndexPageHeader(IndexPageHeader&&) noexcept; IndexPageHeader& operator=(const IndexPageHeader&) noexcept; IndexPageHeader& operator=(IndexPageHeader&&) noexcept; - IndexPageHeader() noexcept { - } + IndexPageHeader() noexcept; virtual ~IndexPageHeader() noexcept; - bool operator == (const IndexPageHeader & /* rhs */) const - { - return true; - } + bool operator == (const IndexPageHeader & /* rhs */) const; bool operator != (const IndexPageHeader &rhs) const { return !(*this == rhs); } @@ -1948,7 +1974,7 @@ typedef struct _DictionaryPageHeader__isset { * The dictionary page must be placed at the first position of the column chunk * if it is partly or completely dictionary encoded. At most one dictionary page * can be placed in a column chunk. - * + * */ class DictionaryPageHeader { public: @@ -1957,11 +1983,7 @@ class DictionaryPageHeader { DictionaryPageHeader(DictionaryPageHeader&&) noexcept; DictionaryPageHeader& operator=(const DictionaryPageHeader&) noexcept; DictionaryPageHeader& operator=(DictionaryPageHeader&&) noexcept; - DictionaryPageHeader() noexcept - : num_values(0), - encoding(static_cast(0)), - is_sorted(0) { - } + DictionaryPageHeader() noexcept; virtual ~DictionaryPageHeader() noexcept; /** @@ -1970,7 +1992,7 @@ class DictionaryPageHeader { int32_t num_values; /** * Encoding using this dictionary page * - * + * * @see Encoding */ Encoding::type encoding; @@ -1987,18 +2009,7 @@ class DictionaryPageHeader { void __set_is_sorted(const bool val); - bool operator == (const DictionaryPageHeader & rhs) const - { - if (!(num_values == rhs.num_values)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (__isset.is_sorted != rhs.__isset.is_sorted) - return false; - else if (__isset.is_sorted && !(is_sorted == rhs.is_sorted)) - return false; - return true; - } + bool operator == (const DictionaryPageHeader & rhs) const; bool operator != (const DictionaryPageHeader &rhs) const { return !(*this == rhs); } @@ -2027,7 +2038,7 @@ typedef struct _DataPageHeaderV2__isset { * New page format allowing reading levels without decompressing the data * Repetition and definition levels are uncompressed * The remaining section containing the data is compressed if is_compressed is true - * + * */ class DataPageHeaderV2 { public: @@ -2036,15 +2047,7 @@ class DataPageHeaderV2 { DataPageHeaderV2(DataPageHeaderV2&&) noexcept; DataPageHeaderV2& operator=(const DataPageHeaderV2&); DataPageHeaderV2& operator=(DataPageHeaderV2&&) noexcept; - DataPageHeaderV2() noexcept - : num_values(0), - num_nulls(0), - num_rows(0), - encoding(static_cast(0)), - definition_levels_byte_length(0), - repetition_levels_byte_length(0), - is_compressed(true) { - } + DataPageHeaderV2() noexcept; virtual ~DataPageHeaderV2() noexcept; /** @@ -2057,12 +2060,15 @@ class DataPageHeaderV2 { */ int32_t num_nulls; /** - * Number of rows in this data page. which means pages change on record boundaries (r = 0) * + * Number of rows in this data page. Every page must begin at a + * row boundary (repetition_level = 0): rows must **not** be + * split across page boundaries when using V2 data pages. + * */ int32_t num_rows; /** * Encoding used for data in this page * - * + * * @see Encoding */ Encoding::type encoding; @@ -2105,30 +2111,7 @@ class DataPageHeaderV2 { void __set_statistics(const Statistics& val); - bool operator == (const DataPageHeaderV2 & rhs) const - { - if (!(num_values == rhs.num_values)) - return false; - if (!(num_nulls == rhs.num_nulls)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (!(definition_levels_byte_length == rhs.definition_levels_byte_length)) - return false; - if (!(repetition_levels_byte_length == rhs.repetition_levels_byte_length)) - return false; - if (__isset.is_compressed != rhs.__isset.is_compressed) - return false; - else if (__isset.is_compressed && !(is_compressed == rhs.is_compressed)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - return true; - } + bool operator == (const DataPageHeaderV2 & rhs) const; bool operator != (const DataPageHeaderV2 &rhs) const { return !(*this == rhs); } @@ -2158,15 +2141,11 @@ class SplitBlockAlgorithm { SplitBlockAlgorithm(SplitBlockAlgorithm&&) noexcept; SplitBlockAlgorithm& operator=(const SplitBlockAlgorithm&) noexcept; SplitBlockAlgorithm& operator=(SplitBlockAlgorithm&&) noexcept; - SplitBlockAlgorithm() noexcept { - } + SplitBlockAlgorithm() noexcept; virtual ~SplitBlockAlgorithm() noexcept; - bool operator == (const SplitBlockAlgorithm & /* rhs */) const - { - return true; - } + bool operator == (const SplitBlockAlgorithm & /* rhs */) const; bool operator != (const SplitBlockAlgorithm &rhs) const { return !(*this == rhs); } @@ -2200,8 +2179,7 @@ class BloomFilterAlgorithm { BloomFilterAlgorithm(BloomFilterAlgorithm&&) noexcept; BloomFilterAlgorithm& operator=(const BloomFilterAlgorithm&) noexcept; BloomFilterAlgorithm& operator=(BloomFilterAlgorithm&&) noexcept; - BloomFilterAlgorithm() noexcept { - } + BloomFilterAlgorithm() noexcept; virtual ~BloomFilterAlgorithm() noexcept; /** @@ -2213,14 +2191,7 @@ class BloomFilterAlgorithm { void __set_BLOCK(const SplitBlockAlgorithm& val); - bool operator == (const BloomFilterAlgorithm & rhs) const - { - if (__isset.BLOCK != rhs.__isset.BLOCK) - return false; - else if (__isset.BLOCK && !(BLOCK == rhs.BLOCK)) - return false; - return true; - } + bool operator == (const BloomFilterAlgorithm & rhs) const; bool operator != (const BloomFilterAlgorithm &rhs) const { return !(*this == rhs); } @@ -2243,7 +2214,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterAlgorithm& obj); /** * Hash strategy type annotation. xxHash is an extremely fast non-cryptographic hash * algorithm. It uses 64 bits version of xxHash. - * + * */ class XxHash { public: @@ -2252,15 +2223,11 @@ class XxHash { XxHash(XxHash&&) noexcept; XxHash& operator=(const XxHash&) noexcept; XxHash& operator=(XxHash&&) noexcept; - XxHash() noexcept { - } + XxHash() noexcept; virtual ~XxHash() noexcept; - bool operator == (const XxHash & /* rhs */) const - { - return true; - } + bool operator == (const XxHash & /* rhs */) const; bool operator != (const XxHash &rhs) const { return !(*this == rhs); } @@ -2287,7 +2254,7 @@ typedef struct _BloomFilterHash__isset { /** * The hash function used in Bloom filter. This function takes the hash of a column value * using plain encoding. - * + * */ class BloomFilterHash { public: @@ -2296,8 +2263,7 @@ class BloomFilterHash { BloomFilterHash(BloomFilterHash&&) noexcept; BloomFilterHash& operator=(const BloomFilterHash&) noexcept; BloomFilterHash& operator=(BloomFilterHash&&) noexcept; - BloomFilterHash() noexcept { - } + BloomFilterHash() noexcept; virtual ~BloomFilterHash() noexcept; /** @@ -2309,14 +2275,7 @@ class BloomFilterHash { void __set_XXHASH(const XxHash& val); - bool operator == (const BloomFilterHash & rhs) const - { - if (__isset.XXHASH != rhs.__isset.XXHASH) - return false; - else if (__isset.XXHASH && !(XXHASH == rhs.XXHASH)) - return false; - return true; - } + bool operator == (const BloomFilterHash & rhs) const; bool operator != (const BloomFilterHash &rhs) const { return !(*this == rhs); } @@ -2338,7 +2297,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterHash& obj); /** * The compression used in the Bloom filter. - * + * */ class Uncompressed { public: @@ -2347,15 +2306,11 @@ class Uncompressed { Uncompressed(Uncompressed&&) noexcept; Uncompressed& operator=(const Uncompressed&) noexcept; Uncompressed& operator=(Uncompressed&&) noexcept; - Uncompressed() noexcept { - } + Uncompressed() noexcept; virtual ~Uncompressed() noexcept; - bool operator == (const Uncompressed & /* rhs */) const - { - return true; - } + bool operator == (const Uncompressed & /* rhs */) const; bool operator != (const Uncompressed &rhs) const { return !(*this == rhs); } @@ -2386,8 +2341,7 @@ class BloomFilterCompression { BloomFilterCompression(BloomFilterCompression&&) noexcept; BloomFilterCompression& operator=(const BloomFilterCompression&) noexcept; BloomFilterCompression& operator=(BloomFilterCompression&&) noexcept; - BloomFilterCompression() noexcept { - } + BloomFilterCompression() noexcept; virtual ~BloomFilterCompression() noexcept; Uncompressed UNCOMPRESSED; @@ -2396,14 +2350,7 @@ class BloomFilterCompression { void __set_UNCOMPRESSED(const Uncompressed& val); - bool operator == (const BloomFilterCompression & rhs) const - { - if (__isset.UNCOMPRESSED != rhs.__isset.UNCOMPRESSED) - return false; - else if (__isset.UNCOMPRESSED && !(UNCOMPRESSED == rhs.UNCOMPRESSED)) - return false; - return true; - } + bool operator == (const BloomFilterCompression & rhs) const; bool operator != (const BloomFilterCompression &rhs) const { return !(*this == rhs); } @@ -2426,7 +2373,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterCompression& obj); /** * Bloom filter header is stored at beginning of Bloom filter data of each column * and followed by its bitset. - * + * */ class BloomFilterHeader { public: @@ -2435,9 +2382,7 @@ class BloomFilterHeader { BloomFilterHeader(BloomFilterHeader&&) noexcept; BloomFilterHeader& operator=(const BloomFilterHeader&) noexcept; BloomFilterHeader& operator=(BloomFilterHeader&&) noexcept; - BloomFilterHeader() noexcept - : numBytes(0) { - } + BloomFilterHeader() noexcept; virtual ~BloomFilterHeader() noexcept; /** @@ -2465,18 +2410,7 @@ class BloomFilterHeader { void __set_compression(const BloomFilterCompression& val); - bool operator == (const BloomFilterHeader & rhs) const - { - if (!(numBytes == rhs.numBytes)) - return false; - if (!(algorithm == rhs.algorithm)) - return false; - if (!(hash == rhs.hash)) - return false; - if (!(compression == rhs.compression)) - return false; - return true; - } + bool operator == (const BloomFilterHeader & rhs) const; bool operator != (const BloomFilterHeader &rhs) const { return !(*this == rhs); } @@ -2511,17 +2445,12 @@ class PageHeader { PageHeader(PageHeader&&) noexcept; PageHeader& operator=(const PageHeader&); PageHeader& operator=(PageHeader&&) noexcept; - PageHeader() noexcept - : type(static_cast(0)), - uncompressed_page_size(0), - compressed_page_size(0), - crc(0) { - } + PageHeader() noexcept; virtual ~PageHeader() noexcept; /** * the type of the page: indicates which of the *_header fields is set * - * + * * @see PageType */ PageType::type type; @@ -2535,7 +2464,7 @@ class PageHeader { int32_t compressed_page_size; /** * The 32-bit CRC checksum for the page, to be be calculated as follows: - * + * * - The standard CRC32 algorithm is used (with polynomial 0x04C11DB7, * the same as in e.g. GZip). * - All page types can have a CRC (v1 and v2 data pages, dictionary pages, @@ -2547,7 +2476,7 @@ class PageHeader { * encrypted). * - The CRC computation therefore takes place after any compression * and encryption steps, if any. - * + * * If enabled, this allows for disabling checksumming in HDFS if only a few * pages need to be read. */ @@ -2575,36 +2504,7 @@ class PageHeader { void __set_data_page_header_v2(const DataPageHeaderV2& val); - bool operator == (const PageHeader & rhs) const - { - if (!(type == rhs.type)) - return false; - if (!(uncompressed_page_size == rhs.uncompressed_page_size)) - return false; - if (!(compressed_page_size == rhs.compressed_page_size)) - return false; - if (__isset.crc != rhs.__isset.crc) - return false; - else if (__isset.crc && !(crc == rhs.crc)) - return false; - if (__isset.data_page_header != rhs.__isset.data_page_header) - return false; - else if (__isset.data_page_header && !(data_page_header == rhs.data_page_header)) - return false; - if (__isset.index_page_header != rhs.__isset.index_page_header) - return false; - else if (__isset.index_page_header && !(index_page_header == rhs.index_page_header)) - return false; - if (__isset.dictionary_page_header != rhs.__isset.dictionary_page_header) - return false; - else if (__isset.dictionary_page_header && !(dictionary_page_header == rhs.dictionary_page_header)) - return false; - if (__isset.data_page_header_v2 != rhs.__isset.data_page_header_v2) - return false; - else if (__isset.data_page_header_v2 && !(data_page_header_v2 == rhs.data_page_header_v2)) - return false; - return true; - } + bool operator == (const PageHeader & rhs) const; bool operator != (const PageHeader &rhs) const { return !(*this == rhs); } @@ -2638,10 +2538,7 @@ class KeyValue { KeyValue(KeyValue&&) noexcept; KeyValue& operator=(const KeyValue&); KeyValue& operator=(KeyValue&&) noexcept; - KeyValue() noexcept - : key(), - value() { - } + KeyValue() noexcept; virtual ~KeyValue() noexcept; std::string key; @@ -2653,16 +2550,7 @@ class KeyValue { void __set_value(const std::string& val); - bool operator == (const KeyValue & rhs) const - { - if (!(key == rhs.key)) - return false; - if (__isset.value != rhs.__isset.value) - return false; - else if (__isset.value && !(value == rhs.value)) - return false; - return true; - } + bool operator == (const KeyValue & rhs) const; bool operator != (const KeyValue &rhs) const { return !(*this == rhs); } @@ -2683,7 +2571,7 @@ std::ostream& operator<<(std::ostream& out, const KeyValue& obj); /** - * Wrapper struct to specify sort order + * Sort order within a RowGroup of a leaf column */ class SortingColumn { public: @@ -2692,15 +2580,11 @@ class SortingColumn { SortingColumn(SortingColumn&&) noexcept; SortingColumn& operator=(const SortingColumn&) noexcept; SortingColumn& operator=(SortingColumn&&) noexcept; - SortingColumn() noexcept - : column_idx(0), - descending(0), - nulls_first(0) { - } + SortingColumn() noexcept; virtual ~SortingColumn() noexcept; /** - * The column index (in this row group) * + * The ordinal position of the column (in this row group) * */ int32_t column_idx; /** @@ -2719,16 +2603,7 @@ class SortingColumn { void __set_nulls_first(const bool val); - bool operator == (const SortingColumn & rhs) const - { - if (!(column_idx == rhs.column_idx)) - return false; - if (!(descending == rhs.descending)) - return false; - if (!(nulls_first == rhs.nulls_first)) - return false; - return true; - } + bool operator == (const SortingColumn & rhs) const; bool operator != (const SortingColumn &rhs) const { return !(*this == rhs); } @@ -2758,22 +2633,18 @@ class PageEncodingStats { PageEncodingStats(PageEncodingStats&&) noexcept; PageEncodingStats& operator=(const PageEncodingStats&) noexcept; PageEncodingStats& operator=(PageEncodingStats&&) noexcept; - PageEncodingStats() noexcept - : page_type(static_cast(0)), - encoding(static_cast(0)), - count(0) { - } + PageEncodingStats() noexcept; virtual ~PageEncodingStats() noexcept; /** * the page type (data/dic/...) * - * + * * @see PageType */ PageType::type page_type; /** * encoding of the page * - * + * * @see Encoding */ Encoding::type encoding; @@ -2788,16 +2659,7 @@ class PageEncodingStats { void __set_count(const int32_t val); - bool operator == (const PageEncodingStats & rhs) const - { - if (!(page_type == rhs.page_type)) - return false; - if (!(encoding == rhs.encoding)) - return false; - if (!(count == rhs.count)) - return false; - return true; - } + bool operator == (const PageEncodingStats & rhs) const; bool operator != (const PageEncodingStats &rhs) const { return !(*this == rhs); } @@ -2817,7 +2679,7 @@ void swap(PageEncodingStats &a, PageEncodingStats &b); std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj); typedef struct _ColumnMetaData__isset { - _ColumnMetaData__isset() : key_value_metadata(false), index_page_offset(false), dictionary_page_offset(false), statistics(false), encoding_stats(false), bloom_filter_offset(false), bloom_filter_length(false), size_statistics(false) {} + _ColumnMetaData__isset() : key_value_metadata(false), index_page_offset(false), dictionary_page_offset(false), statistics(false), encoding_stats(false), bloom_filter_offset(false), bloom_filter_length(false), size_statistics(false), geospatial_statistics(false) {} bool key_value_metadata :1; bool index_page_offset :1; bool dictionary_page_offset :1; @@ -2826,6 +2688,7 @@ typedef struct _ColumnMetaData__isset { bool bloom_filter_offset :1; bool bloom_filter_length :1; bool size_statistics :1; + bool geospatial_statistics :1; } _ColumnMetaData__isset; /** @@ -2838,23 +2701,12 @@ class ColumnMetaData { ColumnMetaData(ColumnMetaData&&) noexcept; ColumnMetaData& operator=(const ColumnMetaData&); ColumnMetaData& operator=(ColumnMetaData&&) noexcept; - ColumnMetaData() noexcept - : type(static_cast(0)), - codec(static_cast(0)), - num_values(0), - total_uncompressed_size(0), - total_compressed_size(0), - data_page_offset(0), - index_page_offset(0), - dictionary_page_offset(0), - bloom_filter_offset(0), - bloom_filter_length(0) { - } + ColumnMetaData() noexcept; virtual ~ColumnMetaData() noexcept; /** * Type of this column * - * + * * @see Type */ Type::type type; @@ -2869,7 +2721,7 @@ class ColumnMetaData { std::vector path_in_schema; /** * Compression codec * - * + * * @see CompressionCodec */ CompressionCodec::type codec; @@ -2931,6 +2783,10 @@ class ColumnMetaData { * filter pushdown. */ SizeStatistics size_statistics; + /** + * Optional statistics specific for Geometry and Geography logical types + */ + GeospatialStatistics geospatial_statistics; _ColumnMetaData__isset __isset; @@ -2966,58 +2822,9 @@ class ColumnMetaData { void __set_size_statistics(const SizeStatistics& val); - bool operator == (const ColumnMetaData & rhs) const - { - if (!(type == rhs.type)) - return false; - if (!(encodings == rhs.encodings)) - return false; - if (!(path_in_schema == rhs.path_in_schema)) - return false; - if (!(codec == rhs.codec)) - return false; - if (!(num_values == rhs.num_values)) - return false; - if (!(total_uncompressed_size == rhs.total_uncompressed_size)) - return false; - if (!(total_compressed_size == rhs.total_compressed_size)) - return false; - if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) - return false; - else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) - return false; - if (!(data_page_offset == rhs.data_page_offset)) - return false; - if (__isset.index_page_offset != rhs.__isset.index_page_offset) - return false; - else if (__isset.index_page_offset && !(index_page_offset == rhs.index_page_offset)) - return false; - if (__isset.dictionary_page_offset != rhs.__isset.dictionary_page_offset) - return false; - else if (__isset.dictionary_page_offset && !(dictionary_page_offset == rhs.dictionary_page_offset)) - return false; - if (__isset.statistics != rhs.__isset.statistics) - return false; - else if (__isset.statistics && !(statistics == rhs.statistics)) - return false; - if (__isset.encoding_stats != rhs.__isset.encoding_stats) - return false; - else if (__isset.encoding_stats && !(encoding_stats == rhs.encoding_stats)) - return false; - if (__isset.bloom_filter_offset != rhs.__isset.bloom_filter_offset) - return false; - else if (__isset.bloom_filter_offset && !(bloom_filter_offset == rhs.bloom_filter_offset)) - return false; - if (__isset.bloom_filter_length != rhs.__isset.bloom_filter_length) - return false; - else if (__isset.bloom_filter_length && !(bloom_filter_length == rhs.bloom_filter_length)) - return false; - if (__isset.size_statistics != rhs.__isset.size_statistics) - return false; - else if (__isset.size_statistics && !(size_statistics == rhs.size_statistics)) - return false; - return true; - } + void __set_geospatial_statistics(const GeospatialStatistics& val); + + bool operator == (const ColumnMetaData & rhs) const; bool operator != (const ColumnMetaData &rhs) const { return !(*this == rhs); } @@ -3044,15 +2851,11 @@ class EncryptionWithFooterKey { EncryptionWithFooterKey(EncryptionWithFooterKey&&) noexcept; EncryptionWithFooterKey& operator=(const EncryptionWithFooterKey&) noexcept; EncryptionWithFooterKey& operator=(EncryptionWithFooterKey&&) noexcept; - EncryptionWithFooterKey() noexcept { - } + EncryptionWithFooterKey() noexcept; virtual ~EncryptionWithFooterKey() noexcept; - bool operator == (const EncryptionWithFooterKey & /* rhs */) const - { - return true; - } + bool operator == (const EncryptionWithFooterKey & /* rhs */) const; bool operator != (const EncryptionWithFooterKey &rhs) const { return !(*this == rhs); } @@ -3083,9 +2886,7 @@ class EncryptionWithColumnKey { EncryptionWithColumnKey(EncryptionWithColumnKey&&) noexcept; EncryptionWithColumnKey& operator=(const EncryptionWithColumnKey&); EncryptionWithColumnKey& operator=(EncryptionWithColumnKey&&) noexcept; - EncryptionWithColumnKey() noexcept - : key_metadata() { - } + EncryptionWithColumnKey() noexcept; virtual ~EncryptionWithColumnKey() noexcept; /** @@ -3103,16 +2904,7 @@ class EncryptionWithColumnKey { void __set_key_metadata(const std::string& val); - bool operator == (const EncryptionWithColumnKey & rhs) const - { - if (!(path_in_schema == rhs.path_in_schema)) - return false; - if (__isset.key_metadata != rhs.__isset.key_metadata) - return false; - else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata)) - return false; - return true; - } + bool operator == (const EncryptionWithColumnKey & rhs) const; bool operator != (const EncryptionWithColumnKey &rhs) const { return !(*this == rhs); } @@ -3144,8 +2936,7 @@ class ColumnCryptoMetaData { ColumnCryptoMetaData(ColumnCryptoMetaData&&) noexcept; ColumnCryptoMetaData& operator=(const ColumnCryptoMetaData&); ColumnCryptoMetaData& operator=(ColumnCryptoMetaData&&) noexcept; - ColumnCryptoMetaData() noexcept { - } + ColumnCryptoMetaData() noexcept; virtual ~ColumnCryptoMetaData() noexcept; EncryptionWithFooterKey ENCRYPTION_WITH_FOOTER_KEY; @@ -3157,18 +2948,7 @@ class ColumnCryptoMetaData { void __set_ENCRYPTION_WITH_COLUMN_KEY(const EncryptionWithColumnKey& val); - bool operator == (const ColumnCryptoMetaData & rhs) const - { - if (__isset.ENCRYPTION_WITH_FOOTER_KEY != rhs.__isset.ENCRYPTION_WITH_FOOTER_KEY) - return false; - else if (__isset.ENCRYPTION_WITH_FOOTER_KEY && !(ENCRYPTION_WITH_FOOTER_KEY == rhs.ENCRYPTION_WITH_FOOTER_KEY)) - return false; - if (__isset.ENCRYPTION_WITH_COLUMN_KEY != rhs.__isset.ENCRYPTION_WITH_COLUMN_KEY) - return false; - else if (__isset.ENCRYPTION_WITH_COLUMN_KEY && !(ENCRYPTION_WITH_COLUMN_KEY == rhs.ENCRYPTION_WITH_COLUMN_KEY)) - return false; - return true; - } + bool operator == (const ColumnCryptoMetaData & rhs) const; bool operator != (const ColumnCryptoMetaData &rhs) const { return !(*this == rhs); } @@ -3206,32 +2986,32 @@ class ColumnChunk { ColumnChunk(ColumnChunk&&) noexcept; ColumnChunk& operator=(const ColumnChunk&); ColumnChunk& operator=(ColumnChunk&&) noexcept; - ColumnChunk() noexcept - : file_path(), - file_offset(0), - offset_index_offset(0), - offset_index_length(0), - column_index_offset(0), - column_index_length(0), - encrypted_column_metadata() { - } + ColumnChunk() noexcept; virtual ~ColumnChunk() noexcept; /** * File where column data is stored. If not set, assumed to be same file as * metadata. This path is relative to the current file. - * + * */ std::string file_path; /** - * Byte offset in file_path to the ColumnMetaData * + * Deprecated: Byte offset in file_path to the ColumnMetaData + * + * Past use of this field has been inconsistent, with some implementations + * using it to point to the ColumnMetaData and some using it to point to + * the first page in the column chunk. In many cases, the ColumnMetaData at this + * location is wrong. This field is now deprecated and should not be used. + * Writers should set this field to 0 if no ColumnMetaData has been written outside + * the footer. */ int64_t file_offset; /** - * Column metadata for this chunk. This is the same content as what is at - * file_path/file_offset. Having it here has it replicated in the file - * metadata. - * + * Column metadata for this chunk. Some writers may also replicate this at the + * location pointed to by file_path/file_offset. + * Note: while marked as optional, this field is in fact required by most major + * Parquet implementations. As such, writers MUST populate this field. + * */ ColumnMetaData meta_data; /** @@ -3279,44 +3059,7 @@ class ColumnChunk { void __set_encrypted_column_metadata(const std::string& val); - bool operator == (const ColumnChunk & rhs) const - { - if (__isset.file_path != rhs.__isset.file_path) - return false; - else if (__isset.file_path && !(file_path == rhs.file_path)) - return false; - if (!(file_offset == rhs.file_offset)) - return false; - if (__isset.meta_data != rhs.__isset.meta_data) - return false; - else if (__isset.meta_data && !(meta_data == rhs.meta_data)) - return false; - if (__isset.offset_index_offset != rhs.__isset.offset_index_offset) - return false; - else if (__isset.offset_index_offset && !(offset_index_offset == rhs.offset_index_offset)) - return false; - if (__isset.offset_index_length != rhs.__isset.offset_index_length) - return false; - else if (__isset.offset_index_length && !(offset_index_length == rhs.offset_index_length)) - return false; - if (__isset.column_index_offset != rhs.__isset.column_index_offset) - return false; - else if (__isset.column_index_offset && !(column_index_offset == rhs.column_index_offset)) - return false; - if (__isset.column_index_length != rhs.__isset.column_index_length) - return false; - else if (__isset.column_index_length && !(column_index_length == rhs.column_index_length)) - return false; - if (__isset.crypto_metadata != rhs.__isset.crypto_metadata) - return false; - else if (__isset.crypto_metadata && !(crypto_metadata == rhs.crypto_metadata)) - return false; - if (__isset.encrypted_column_metadata != rhs.__isset.encrypted_column_metadata) - return false; - else if (__isset.encrypted_column_metadata && !(encrypted_column_metadata == rhs.encrypted_column_metadata)) - return false; - return true; - } + bool operator == (const ColumnChunk & rhs) const; bool operator != (const ColumnChunk &rhs) const { return !(*this == rhs); } @@ -3350,19 +3093,13 @@ class RowGroup { RowGroup(RowGroup&&) noexcept; RowGroup& operator=(const RowGroup&); RowGroup& operator=(RowGroup&&) noexcept; - RowGroup() noexcept - : total_byte_size(0), - num_rows(0), - file_offset(0), - total_compressed_size(0), - ordinal(0) { - } + RowGroup() noexcept; virtual ~RowGroup() noexcept; /** * Metadata for each column chunk in this row group. * This list must have the same order as the SchemaElement list in FileMetaData. - * + * */ std::vector columns; /** @@ -3409,32 +3146,7 @@ class RowGroup { void __set_ordinal(const int16_t val); - bool operator == (const RowGroup & rhs) const - { - if (!(columns == rhs.columns)) - return false; - if (!(total_byte_size == rhs.total_byte_size)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (__isset.sorting_columns != rhs.__isset.sorting_columns) - return false; - else if (__isset.sorting_columns && !(sorting_columns == rhs.sorting_columns)) - return false; - if (__isset.file_offset != rhs.__isset.file_offset) - return false; - else if (__isset.file_offset && !(file_offset == rhs.file_offset)) - return false; - if (__isset.total_compressed_size != rhs.__isset.total_compressed_size) - return false; - else if (__isset.total_compressed_size && !(total_compressed_size == rhs.total_compressed_size)) - return false; - if (__isset.ordinal != rhs.__isset.ordinal) - return false; - else if (__isset.ordinal && !(ordinal == rhs.ordinal)) - return false; - return true; - } + bool operator == (const RowGroup & rhs) const; bool operator != (const RowGroup &rhs) const { return !(*this == rhs); } @@ -3464,15 +3176,11 @@ class TypeDefinedOrder { TypeDefinedOrder(TypeDefinedOrder&&) noexcept; TypeDefinedOrder& operator=(const TypeDefinedOrder&) noexcept; TypeDefinedOrder& operator=(TypeDefinedOrder&&) noexcept; - TypeDefinedOrder() noexcept { - } + TypeDefinedOrder() noexcept; virtual ~TypeDefinedOrder() noexcept; - bool operator == (const TypeDefinedOrder & /* rhs */) const - { - return true; - } + bool operator == (const TypeDefinedOrder & /* rhs */) const; bool operator != (const TypeDefinedOrder &rhs) const { return !(*this == rhs); } @@ -3500,11 +3208,11 @@ typedef struct _ColumnOrder__isset { * Union to specify the order used for the min_value and max_value fields for a * column. This union takes the role of an enhanced enum that allows rich * elements (which will be needed for a collation-based ordering in the future). - * + * * Possible values are: * * TypeDefinedOrder - the column uses the order defined by its logical or * physical type (if there is no logical type). - * + * * If the reader does not support the value of this union, min and max stats * for this column should be ignored. */ @@ -3515,8 +3223,7 @@ class ColumnOrder { ColumnOrder(ColumnOrder&&) noexcept; ColumnOrder& operator=(const ColumnOrder&) noexcept; ColumnOrder& operator=(ColumnOrder&&) noexcept; - ColumnOrder() noexcept { - } + ColumnOrder() noexcept; virtual ~ColumnOrder() noexcept; /** @@ -3536,13 +3243,16 @@ class ColumnOrder { * TIME_MICROS - signed comparison * TIMESTAMP_MILLIS - signed comparison * TIMESTAMP_MICROS - signed comparison - * INTERVAL - unsigned comparison + * INTERVAL - undefined * JSON - unsigned byte-wise comparison * BSON - unsigned byte-wise comparison * ENUM - unsigned byte-wise comparison * LIST - undefined * MAP - undefined - * + * VARIANT - undefined + * GEOMETRY - undefined + * GEOGRAPHY - undefined + * * In the absence of logical types, the sort order is determined by the physical type: * BOOLEAN - false, true * INT32 - signed comparison @@ -3552,7 +3262,7 @@ class ColumnOrder { * DOUBLE - signed comparison of the represented value (*) * BYTE_ARRAY - unsigned byte-wise comparison * FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison - * + * * (*) Because the sorting order is not specified properly for floating * point values (relations vs. total ordering) the following * compatibility rules should be applied when reading statistics: @@ -3561,7 +3271,7 @@ class ColumnOrder { * - If the min is +0, the row group may contain -0 values as well. * - If the max is -0, the row group may contain +0 values as well. * - When looking for NaN values, min and max should be ignored. - * + * * When writing statistics the following rules should be followed: * - NaNs should not be written to min or max statistics fields. * - If the computed max value is zero (whether negative or positive), @@ -3575,14 +3285,7 @@ class ColumnOrder { void __set_TYPE_ORDER(const TypeDefinedOrder& val); - bool operator == (const ColumnOrder & rhs) const - { - if (__isset.TYPE_ORDER != rhs.__isset.TYPE_ORDER) - return false; - else if (__isset.TYPE_ORDER && !(TYPE_ORDER == rhs.TYPE_ORDER)) - return false; - return true; - } + bool operator == (const ColumnOrder & rhs) const; bool operator != (const ColumnOrder &rhs) const { return !(*this == rhs); } @@ -3609,11 +3312,7 @@ class PageLocation { PageLocation(PageLocation&&) noexcept; PageLocation& operator=(const PageLocation&) noexcept; PageLocation& operator=(PageLocation&&) noexcept; - PageLocation() noexcept - : offset(0), - compressed_page_size(0), - first_row_index(0) { - } + PageLocation() noexcept; virtual ~PageLocation() noexcept; /** @@ -3626,8 +3325,9 @@ class PageLocation { */ int32_t compressed_page_size; /** - * Index within the RowGroup of the first row of the page; this means pages - * change on record boundaries (r = 0). + * Index within the RowGroup of the first row of the page. When an + * OffsetIndex is present, pages must begin on row boundaries + * (repetition_level = 0). */ int64_t first_row_index; @@ -3637,16 +3337,7 @@ class PageLocation { void __set_first_row_index(const int64_t val); - bool operator == (const PageLocation & rhs) const - { - if (!(offset == rhs.offset)) - return false; - if (!(compressed_page_size == rhs.compressed_page_size)) - return false; - if (!(first_row_index == rhs.first_row_index)) - return false; - return true; - } + bool operator == (const PageLocation & rhs) const; bool operator != (const PageLocation &rhs) const { return !(*this == rhs); } @@ -3670,6 +3361,13 @@ typedef struct _OffsetIndex__isset { bool unencoded_byte_array_data_bytes :1; } _OffsetIndex__isset; +/** + * Optional offsets for each data page in a ColumnChunk. + * + * Forms part of the page index, along with ColumnIndex. + * + * OffsetIndex may be present even if ColumnIndex is not. + */ class OffsetIndex { public: @@ -3677,8 +3375,7 @@ class OffsetIndex { OffsetIndex(OffsetIndex&&) noexcept; OffsetIndex& operator=(const OffsetIndex&); OffsetIndex& operator=(OffsetIndex&&) noexcept; - OffsetIndex() noexcept { - } + OffsetIndex() noexcept; virtual ~OffsetIndex() noexcept; /** @@ -3688,7 +3385,7 @@ class OffsetIndex { std::vector page_locations; /** * Unencoded/uncompressed size for BYTE_ARRAY types. - * + * * See documention for unencoded_byte_array_data_bytes in SizeStatistics for * more details on this field. */ @@ -3700,16 +3397,7 @@ class OffsetIndex { void __set_unencoded_byte_array_data_bytes(const std::vector & val); - bool operator == (const OffsetIndex & rhs) const - { - if (!(page_locations == rhs.page_locations)) - return false; - if (__isset.unencoded_byte_array_data_bytes != rhs.__isset.unencoded_byte_array_data_bytes) - return false; - else if (__isset.unencoded_byte_array_data_bytes && !(unencoded_byte_array_data_bytes == rhs.unencoded_byte_array_data_bytes)) - return false; - return true; - } + bool operator == (const OffsetIndex & rhs) const; bool operator != (const OffsetIndex &rhs) const { return !(*this == rhs); } @@ -3736,8 +3424,14 @@ typedef struct _ColumnIndex__isset { } _ColumnIndex__isset; /** - * Description for ColumnIndex. - * Each [i] refers to the page at OffsetIndex.page_locations[i] + * Optional statistics for each data page in a ColumnChunk. + * + * Forms part the page index, along with OffsetIndex. + * + * If this structure is present, OffsetIndex must also be present. + * + * For each field in this structure, [i] refers to the page at + * OffsetIndex.page_locations[i] */ class ColumnIndex { public: @@ -3746,9 +3440,7 @@ class ColumnIndex { ColumnIndex(ColumnIndex&&) noexcept; ColumnIndex& operator=(const ColumnIndex&); ColumnIndex& operator=(ColumnIndex&&) noexcept; - ColumnIndex() noexcept - : boundary_order(static_cast(0)) { - } + ColumnIndex() noexcept; virtual ~ColumnIndex() noexcept; /** @@ -3776,31 +3468,38 @@ class ColumnIndex { * which direction. This allows readers to perform binary searches in both * lists. Readers cannot assume that max_values[i] <= min_values[i+1], even * if the lists are ordered. - * + * * @see BoundaryOrder */ BoundaryOrder::type boundary_order; /** - * A list containing the number of null values for each page * + * A list containing the number of null values for each page + * + * Writers SHOULD always write this field even if no null values + * are present or the column is not nullable. + * Readers MUST distinguish between null_counts not being present + * and null_count being 0. + * If null_counts are not present, readers MUST NOT assume all + * null counts are 0. */ std::vector null_counts; /** * Contains repetition level histograms for each page * concatenated together. The repetition_level_histogram field on * SizeStatistics contains more details. - * + * * When present the length should always be (number of pages * * (max_repetition_level + 1)) elements. - * + * * Element 0 is the first element of the histogram for the first page. * Element (max_repetition_level + 1) is the first element of the histogram * for the second page. - * + * */ std::vector repetition_level_histograms; /** * Same as repetition_level_histograms except for definitions levels. - * + * */ std::vector definition_level_histograms; @@ -3820,30 +3519,7 @@ class ColumnIndex { void __set_definition_level_histograms(const std::vector & val); - bool operator == (const ColumnIndex & rhs) const - { - if (!(null_pages == rhs.null_pages)) - return false; - if (!(min_values == rhs.min_values)) - return false; - if (!(max_values == rhs.max_values)) - return false; - if (!(boundary_order == rhs.boundary_order)) - return false; - if (__isset.null_counts != rhs.__isset.null_counts) - return false; - else if (__isset.null_counts && !(null_counts == rhs.null_counts)) - return false; - if (__isset.repetition_level_histograms != rhs.__isset.repetition_level_histograms) - return false; - else if (__isset.repetition_level_histograms && !(repetition_level_histograms == rhs.repetition_level_histograms)) - return false; - if (__isset.definition_level_histograms != rhs.__isset.definition_level_histograms) - return false; - else if (__isset.definition_level_histograms && !(definition_level_histograms == rhs.definition_level_histograms)) - return false; - return true; - } + bool operator == (const ColumnIndex & rhs) const; bool operator != (const ColumnIndex &rhs) const { return !(*this == rhs); } @@ -3876,11 +3552,7 @@ class AesGcmV1 { AesGcmV1(AesGcmV1&&) noexcept; AesGcmV1& operator=(const AesGcmV1&); AesGcmV1& operator=(AesGcmV1&&) noexcept; - AesGcmV1() noexcept - : aad_prefix(), - aad_file_unique(), - supply_aad_prefix(0) { - } + AesGcmV1() noexcept; virtual ~AesGcmV1() noexcept; /** @@ -3905,22 +3577,7 @@ class AesGcmV1 { void __set_supply_aad_prefix(const bool val); - bool operator == (const AesGcmV1 & rhs) const - { - if (__isset.aad_prefix != rhs.__isset.aad_prefix) - return false; - else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix)) - return false; - if (__isset.aad_file_unique != rhs.__isset.aad_file_unique) - return false; - else if (__isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique)) - return false; - if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix) - return false; - else if (__isset.supply_aad_prefix && !(supply_aad_prefix == rhs.supply_aad_prefix)) - return false; - return true; - } + bool operator == (const AesGcmV1 & rhs) const; bool operator != (const AesGcmV1 &rhs) const { return !(*this == rhs); } @@ -3953,11 +3610,7 @@ class AesGcmCtrV1 { AesGcmCtrV1(AesGcmCtrV1&&) noexcept; AesGcmCtrV1& operator=(const AesGcmCtrV1&); AesGcmCtrV1& operator=(AesGcmCtrV1&&) noexcept; - AesGcmCtrV1() noexcept - : aad_prefix(), - aad_file_unique(), - supply_aad_prefix(0) { - } + AesGcmCtrV1() noexcept; virtual ~AesGcmCtrV1() noexcept; /** @@ -3982,22 +3635,7 @@ class AesGcmCtrV1 { void __set_supply_aad_prefix(const bool val); - bool operator == (const AesGcmCtrV1 & rhs) const - { - if (__isset.aad_prefix != rhs.__isset.aad_prefix) - return false; - else if (__isset.aad_prefix && !(aad_prefix == rhs.aad_prefix)) - return false; - if (__isset.aad_file_unique != rhs.__isset.aad_file_unique) - return false; - else if (__isset.aad_file_unique && !(aad_file_unique == rhs.aad_file_unique)) - return false; - if (__isset.supply_aad_prefix != rhs.__isset.supply_aad_prefix) - return false; - else if (__isset.supply_aad_prefix && !(supply_aad_prefix == rhs.supply_aad_prefix)) - return false; - return true; - } + bool operator == (const AesGcmCtrV1 & rhs) const; bool operator != (const AesGcmCtrV1 &rhs) const { return !(*this == rhs); } @@ -4029,8 +3667,7 @@ class EncryptionAlgorithm { EncryptionAlgorithm(EncryptionAlgorithm&&) noexcept; EncryptionAlgorithm& operator=(const EncryptionAlgorithm&); EncryptionAlgorithm& operator=(EncryptionAlgorithm&&) noexcept; - EncryptionAlgorithm() noexcept { - } + EncryptionAlgorithm() noexcept; virtual ~EncryptionAlgorithm() noexcept; AesGcmV1 AES_GCM_V1; @@ -4042,18 +3679,7 @@ class EncryptionAlgorithm { void __set_AES_GCM_CTR_V1(const AesGcmCtrV1& val); - bool operator == (const EncryptionAlgorithm & rhs) const - { - if (__isset.AES_GCM_V1 != rhs.__isset.AES_GCM_V1) - return false; - else if (__isset.AES_GCM_V1 && !(AES_GCM_V1 == rhs.AES_GCM_V1)) - return false; - if (__isset.AES_GCM_CTR_V1 != rhs.__isset.AES_GCM_CTR_V1) - return false; - else if (__isset.AES_GCM_CTR_V1 && !(AES_GCM_CTR_V1 == rhs.AES_GCM_CTR_V1)) - return false; - return true; - } + bool operator == (const EncryptionAlgorithm & rhs) const; bool operator != (const EncryptionAlgorithm &rhs) const { return !(*this == rhs); } @@ -4091,12 +3717,7 @@ class FileMetaData { FileMetaData(FileMetaData&&) noexcept; FileMetaData& operator=(const FileMetaData&); FileMetaData& operator=(FileMetaData&&) noexcept; - FileMetaData() noexcept - : version(0), - num_rows(0), - created_by(), - footer_signing_key_metadata() { - } + FileMetaData() noexcept; virtual ~FileMetaData() noexcept; /** @@ -4128,7 +3749,7 @@ class FileMetaData { * String for application that wrote this file. This should be in the format * version (build ). * e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55) - * + * */ std::string created_by; /** @@ -4138,12 +3759,12 @@ class FileMetaData { * matching the columns in the schema. The indexes are not necessary the same * though, because only leaf nodes of the schema are represented in the list * of sort orders. - * + * * Without column_orders, the meaning of the min_value and max_value fields * in the Statistics object and the ColumnIndex object is undefined. To ensure * well-defined behaviour, if these fields are written to a Parquet file, * column_orders must be written as well. - * + * * The obsolete min and max fields in the Statistics object are always sorted * by signed comparison regardless of column_orders. */ @@ -4180,38 +3801,7 @@ class FileMetaData { void __set_footer_signing_key_metadata(const std::string& val); - bool operator == (const FileMetaData & rhs) const - { - if (!(version == rhs.version)) - return false; - if (!(schema == rhs.schema)) - return false; - if (!(num_rows == rhs.num_rows)) - return false; - if (!(row_groups == rhs.row_groups)) - return false; - if (__isset.key_value_metadata != rhs.__isset.key_value_metadata) - return false; - else if (__isset.key_value_metadata && !(key_value_metadata == rhs.key_value_metadata)) - return false; - if (__isset.created_by != rhs.__isset.created_by) - return false; - else if (__isset.created_by && !(created_by == rhs.created_by)) - return false; - if (__isset.column_orders != rhs.__isset.column_orders) - return false; - else if (__isset.column_orders && !(column_orders == rhs.column_orders)) - return false; - if (__isset.encryption_algorithm != rhs.__isset.encryption_algorithm) - return false; - else if (__isset.encryption_algorithm && !(encryption_algorithm == rhs.encryption_algorithm)) - return false; - if (__isset.footer_signing_key_metadata != rhs.__isset.footer_signing_key_metadata) - return false; - else if (__isset.footer_signing_key_metadata && !(footer_signing_key_metadata == rhs.footer_signing_key_metadata)) - return false; - return true; - } + bool operator == (const FileMetaData & rhs) const; bool operator != (const FileMetaData &rhs) const { return !(*this == rhs); } @@ -4245,9 +3835,7 @@ class FileCryptoMetaData { FileCryptoMetaData(FileCryptoMetaData&&) noexcept; FileCryptoMetaData& operator=(const FileCryptoMetaData&); FileCryptoMetaData& operator=(FileCryptoMetaData&&) noexcept; - FileCryptoMetaData() noexcept - : key_metadata() { - } + FileCryptoMetaData() noexcept; virtual ~FileCryptoMetaData() noexcept; /** @@ -4268,16 +3856,7 @@ class FileCryptoMetaData { void __set_key_metadata(const std::string& val); - bool operator == (const FileCryptoMetaData & rhs) const - { - if (!(encryption_algorithm == rhs.encryption_algorithm)) - return false; - if (__isset.key_metadata != rhs.__isset.key_metadata) - return false; - else if (__isset.key_metadata && !(key_metadata == rhs.key_metadata)) - return false; - return true; - } + bool operator == (const FileCryptoMetaData & rhs) const; bool operator != (const FileCryptoMetaData &rhs) const { return !(*this == rhs); } diff --git a/cpp/src/generated/parquet_types.tcc b/cpp/src/generated/parquet_types.tcc index ee02d7f0139fc..e47e2220ebbd9 100644 --- a/cpp/src/generated/parquet_types.tcc +++ b/cpp/src/generated/parquet_types.tcc @@ -1,5 +1,5 @@ /** - * Autogenerated by Thrift Compiler (0.19.0) + * Autogenerated by Thrift Compiler (0.21.0) * * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING * @generated @@ -135,6 +135,254 @@ uint32_t SizeStatistics::write(Protocol_* oprot) const { return xfer; } +template +uint32_t BoundingBox::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_xmin = false; + bool isset_xmax = false; + bool isset_ymin = false; + bool isset_ymax = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_DOUBLE) { + xfer += iprot->readDouble(this->xmin); + isset_xmin = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_DOUBLE) { + xfer += iprot->readDouble(this->xmax); + isset_xmax = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_DOUBLE) { + xfer += iprot->readDouble(this->ymin); + isset_ymin = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_DOUBLE) { + xfer += iprot->readDouble(this->ymax); + isset_ymax = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_DOUBLE) { + xfer += iprot->readDouble(this->zmin); + this->__isset.zmin = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_DOUBLE) { + xfer += iprot->readDouble(this->zmax); + this->__isset.zmax = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_DOUBLE) { + xfer += iprot->readDouble(this->mmin); + this->__isset.mmin = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_DOUBLE) { + xfer += iprot->readDouble(this->mmax); + this->__isset.mmax = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_xmin) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_xmax) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_ymin) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_ymax) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t BoundingBox::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BoundingBox"); + + xfer += oprot->writeFieldBegin("xmin", ::apache::thrift::protocol::T_DOUBLE, 1); + xfer += oprot->writeDouble(this->xmin); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("xmax", ::apache::thrift::protocol::T_DOUBLE, 2); + xfer += oprot->writeDouble(this->xmax); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("ymin", ::apache::thrift::protocol::T_DOUBLE, 3); + xfer += oprot->writeDouble(this->ymin); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("ymax", ::apache::thrift::protocol::T_DOUBLE, 4); + xfer += oprot->writeDouble(this->ymax); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.zmin) { + xfer += oprot->writeFieldBegin("zmin", ::apache::thrift::protocol::T_DOUBLE, 5); + xfer += oprot->writeDouble(this->zmin); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.zmax) { + xfer += oprot->writeFieldBegin("zmax", ::apache::thrift::protocol::T_DOUBLE, 6); + xfer += oprot->writeDouble(this->zmax); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.mmin) { + xfer += oprot->writeFieldBegin("mmin", ::apache::thrift::protocol::T_DOUBLE, 7); + xfer += oprot->writeDouble(this->mmin); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.mmax) { + xfer += oprot->writeFieldBegin("mmax", ::apache::thrift::protocol::T_DOUBLE, 8); + xfer += oprot->writeDouble(this->mmax); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t GeospatialStatistics::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->bbox.read(iprot); + this->__isset.bbox = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->geospatial_types.clear(); + uint32_t _size20; + ::apache::thrift::protocol::TType _etype23; + xfer += iprot->readListBegin(_etype23, _size20); + this->geospatial_types.resize(_size20); + uint32_t _i24; + for (_i24 = 0; _i24 < _size20; ++_i24) + { + xfer += iprot->readI32(this->geospatial_types[_i24]); + } + xfer += iprot->readListEnd(); + } + this->__isset.geospatial_types = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t GeospatialStatistics::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("GeospatialStatistics"); + + if (this->__isset.bbox) { + xfer += oprot->writeFieldBegin("bbox", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->bbox.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.geospatial_types) { + xfer += oprot->writeFieldBegin("geospatial_types", ::apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast(this->geospatial_types.size())); + std::vector ::const_iterator _iter25; + for (_iter25 = this->geospatial_types.begin(); _iter25 != this->geospatial_types.end(); ++_iter25) + { + xfer += oprot->writeI32((*_iter25)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + template uint32_t Statistics::read(Protocol_* iprot) { @@ -1188,6 +1436,177 @@ uint32_t BsonType::write(Protocol_* oprot) const { return xfer; } +template +uint32_t VariantType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t VariantType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("VariantType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t GeometryType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->crs); + this->__isset.crs = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t GeometryType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("GeometryType"); + + if (this->__isset.crs) { + xfer += oprot->writeFieldBegin("crs", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeString(this->crs); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t GeographyType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->crs); + this->__isset.crs = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast114; + xfer += iprot->readI32(ecast114); + this->algorithm = static_cast(ecast114); + this->__isset.algorithm = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t GeographyType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("GeographyType"); + + if (this->__isset.crs) { + xfer += oprot->writeFieldBegin("crs", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeString(this->crs); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.algorithm) { + xfer += oprot->writeFieldBegin("algorithm", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(static_cast(this->algorithm)); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + template uint32_t LogicalType::read(Protocol_* iprot) { @@ -1322,6 +1741,30 @@ uint32_t LogicalType::read(Protocol_* iprot) { xfer += iprot->skip(ftype); } break; + case 16: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->VARIANT.read(iprot); + this->__isset.VARIANT = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 17: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->GEOMETRY.read(iprot); + this->__isset.GEOMETRY = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 18: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->GEOGRAPHY.read(iprot); + this->__isset.GEOGRAPHY = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -1410,6 +1853,21 @@ uint32_t LogicalType::write(Protocol_* oprot) const { xfer += this->FLOAT16.write(oprot); xfer += oprot->writeFieldEnd(); } + if (this->__isset.VARIANT) { + xfer += oprot->writeFieldBegin("VARIANT", ::apache::thrift::protocol::T_STRUCT, 16); + xfer += this->VARIANT.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.GEOMETRY) { + xfer += oprot->writeFieldBegin("GEOMETRY", ::apache::thrift::protocol::T_STRUCT, 17); + xfer += this->GEOMETRY.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.GEOGRAPHY) { + xfer += oprot->writeFieldBegin("GEOGRAPHY", ::apache::thrift::protocol::T_STRUCT, 18); + xfer += this->GEOGRAPHY.write(oprot); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -1440,9 +1898,9 @@ uint32_t SchemaElement::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast96; - xfer += iprot->readI32(ecast96); - this->type = static_cast(ecast96); + int32_t ecast123; + xfer += iprot->readI32(ecast123); + this->type = static_cast(ecast123); this->__isset.type = true; } else { xfer += iprot->skip(ftype); @@ -1458,9 +1916,9 @@ uint32_t SchemaElement::read(Protocol_* iprot) { break; case 3: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast97; - xfer += iprot->readI32(ecast97); - this->repetition_type = static_cast(ecast97); + int32_t ecast124; + xfer += iprot->readI32(ecast124); + this->repetition_type = static_cast(ecast124); this->__isset.repetition_type = true; } else { xfer += iprot->skip(ftype); @@ -1484,9 +1942,9 @@ uint32_t SchemaElement::read(Protocol_* iprot) { break; case 6: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast98; - xfer += iprot->readI32(ecast98); - this->converted_type = static_cast(ecast98); + int32_t ecast125; + xfer += iprot->readI32(ecast125); + this->converted_type = static_cast(ecast125); this->__isset.converted_type = true; } else { xfer += iprot->skip(ftype); @@ -1634,9 +2092,9 @@ uint32_t DataPageHeader::read(Protocol_* iprot) { break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast103; - xfer += iprot->readI32(ecast103); - this->encoding = static_cast(ecast103); + int32_t ecast130; + xfer += iprot->readI32(ecast130); + this->encoding = static_cast(ecast130); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -1644,9 +2102,9 @@ uint32_t DataPageHeader::read(Protocol_* iprot) { break; case 3: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast104; - xfer += iprot->readI32(ecast104); - this->definition_level_encoding = static_cast(ecast104); + int32_t ecast131; + xfer += iprot->readI32(ecast131); + this->definition_level_encoding = static_cast(ecast131); isset_definition_level_encoding = true; } else { xfer += iprot->skip(ftype); @@ -1654,9 +2112,9 @@ uint32_t DataPageHeader::read(Protocol_* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast105; - xfer += iprot->readI32(ecast105); - this->repetition_level_encoding = static_cast(ecast105); + int32_t ecast132; + xfer += iprot->readI32(ecast132); + this->repetition_level_encoding = static_cast(ecast132); isset_repetition_level_encoding = true; } else { xfer += iprot->skip(ftype); @@ -1796,9 +2254,9 @@ uint32_t DictionaryPageHeader::read(Protocol_* iprot) { break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast114; - xfer += iprot->readI32(ecast114); - this->encoding = static_cast(ecast114); + int32_t ecast141; + xfer += iprot->readI32(ecast141); + this->encoding = static_cast(ecast141); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -1906,9 +2364,9 @@ uint32_t DataPageHeaderV2::read(Protocol_* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast119; - xfer += iprot->readI32(ecast119); - this->encoding = static_cast(ecast119); + int32_t ecast146; + xfer += iprot->readI32(ecast146); + this->encoding = static_cast(ecast146); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -2441,9 +2899,9 @@ uint32_t PageHeader::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast152; - xfer += iprot->readI32(ecast152); - this->type = static_cast(ecast152); + int32_t ecast179; + xfer += iprot->readI32(ecast179); + this->type = static_cast(ecast179); isset_type = true; } else { xfer += iprot->skip(ftype); @@ -2761,9 +3219,9 @@ uint32_t PageEncodingStats::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast165; - xfer += iprot->readI32(ecast165); - this->page_type = static_cast(ecast165); + int32_t ecast192; + xfer += iprot->readI32(ecast192); + this->page_type = static_cast(ecast192); isset_page_type = true; } else { xfer += iprot->skip(ftype); @@ -2771,9 +3229,9 @@ uint32_t PageEncodingStats::read(Protocol_* iprot) { break; case 2: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast166; - xfer += iprot->readI32(ecast166); - this->encoding = static_cast(ecast166); + int32_t ecast193; + xfer += iprot->readI32(ecast193); + this->encoding = static_cast(ecast193); isset_encoding = true; } else { xfer += iprot->skip(ftype); @@ -2860,9 +3318,9 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { { case 1: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast171; - xfer += iprot->readI32(ecast171); - this->type = static_cast(ecast171); + int32_t ecast198; + xfer += iprot->readI32(ecast198); + this->type = static_cast(ecast198); isset_type = true; } else { xfer += iprot->skip(ftype); @@ -2872,16 +3330,16 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->encodings.clear(); - uint32_t _size172; - ::apache::thrift::protocol::TType _etype175; - xfer += iprot->readListBegin(_etype175, _size172); - this->encodings.resize(_size172); - uint32_t _i176; - for (_i176 = 0; _i176 < _size172; ++_i176) + uint32_t _size199; + ::apache::thrift::protocol::TType _etype202; + xfer += iprot->readListBegin(_etype202, _size199); + this->encodings.resize(_size199); + uint32_t _i203; + for (_i203 = 0; _i203 < _size199; ++_i203) { - int32_t ecast177; - xfer += iprot->readI32(ecast177); - this->encodings[_i176] = static_cast(ecast177); + int32_t ecast204; + xfer += iprot->readI32(ecast204); + this->encodings[_i203] = static_cast(ecast204); } xfer += iprot->readListEnd(); } @@ -2894,14 +3352,14 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->path_in_schema.clear(); - uint32_t _size178; - ::apache::thrift::protocol::TType _etype181; - xfer += iprot->readListBegin(_etype181, _size178); - this->path_in_schema.resize(_size178); - uint32_t _i182; - for (_i182 = 0; _i182 < _size178; ++_i182) + uint32_t _size205; + ::apache::thrift::protocol::TType _etype208; + xfer += iprot->readListBegin(_etype208, _size205); + this->path_in_schema.resize(_size205); + uint32_t _i209; + for (_i209 = 0; _i209 < _size205; ++_i209) { - xfer += iprot->readString(this->path_in_schema[_i182]); + xfer += iprot->readString(this->path_in_schema[_i209]); } xfer += iprot->readListEnd(); } @@ -2912,9 +3370,9 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast183; - xfer += iprot->readI32(ecast183); - this->codec = static_cast(ecast183); + int32_t ecast210; + xfer += iprot->readI32(ecast210); + this->codec = static_cast(ecast210); isset_codec = true; } else { xfer += iprot->skip(ftype); @@ -2948,14 +3406,14 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->key_value_metadata.clear(); - uint32_t _size184; - ::apache::thrift::protocol::TType _etype187; - xfer += iprot->readListBegin(_etype187, _size184); - this->key_value_metadata.resize(_size184); - uint32_t _i188; - for (_i188 = 0; _i188 < _size184; ++_i188) + uint32_t _size211; + ::apache::thrift::protocol::TType _etype214; + xfer += iprot->readListBegin(_etype214, _size211); + this->key_value_metadata.resize(_size211); + uint32_t _i215; + for (_i215 = 0; _i215 < _size211; ++_i215) { - xfer += this->key_value_metadata[_i188].read(iprot); + xfer += this->key_value_metadata[_i215].read(iprot); } xfer += iprot->readListEnd(); } @@ -3000,14 +3458,14 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->encoding_stats.clear(); - uint32_t _size189; - ::apache::thrift::protocol::TType _etype192; - xfer += iprot->readListBegin(_etype192, _size189); - this->encoding_stats.resize(_size189); - uint32_t _i193; - for (_i193 = 0; _i193 < _size189; ++_i193) + uint32_t _size216; + ::apache::thrift::protocol::TType _etype219; + xfer += iprot->readListBegin(_etype219, _size216); + this->encoding_stats.resize(_size216); + uint32_t _i220; + for (_i220 = 0; _i220 < _size216; ++_i220) { - xfer += this->encoding_stats[_i193].read(iprot); + xfer += this->encoding_stats[_i220].read(iprot); } xfer += iprot->readListEnd(); } @@ -3040,6 +3498,14 @@ uint32_t ColumnMetaData::read(Protocol_* iprot) { xfer += iprot->skip(ftype); } break; + case 17: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->geospatial_statistics.read(iprot); + this->__isset.geospatial_statistics = true; + } else { + xfer += iprot->skip(ftype); + } + break; default: xfer += iprot->skip(ftype); break; @@ -3081,10 +3547,10 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast(this->encodings.size())); - std::vector ::const_iterator _iter194; - for (_iter194 = this->encodings.begin(); _iter194 != this->encodings.end(); ++_iter194) + std::vector ::const_iterator _iter221; + for (_iter221 = this->encodings.begin(); _iter221 != this->encodings.end(); ++_iter221) { - xfer += oprot->writeI32(static_cast((*_iter194))); + xfer += oprot->writeI32(static_cast((*_iter221))); } xfer += oprot->writeListEnd(); } @@ -3093,10 +3559,10 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter195; - for (_iter195 = this->path_in_schema.begin(); _iter195 != this->path_in_schema.end(); ++_iter195) + std::vector ::const_iterator _iter222; + for (_iter222 = this->path_in_schema.begin(); _iter222 != this->path_in_schema.end(); ++_iter222) { - xfer += oprot->writeString((*_iter195)); + xfer += oprot->writeString((*_iter222)); } xfer += oprot->writeListEnd(); } @@ -3122,10 +3588,10 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter196; - for (_iter196 = this->key_value_metadata.begin(); _iter196 != this->key_value_metadata.end(); ++_iter196) + std::vector ::const_iterator _iter223; + for (_iter223 = this->key_value_metadata.begin(); _iter223 != this->key_value_metadata.end(); ++_iter223) { - xfer += (*_iter196).write(oprot); + xfer += (*_iter223).write(oprot); } xfer += oprot->writeListEnd(); } @@ -3154,10 +3620,10 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("encoding_stats", ::apache::thrift::protocol::T_LIST, 13); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->encoding_stats.size())); - std::vector ::const_iterator _iter197; - for (_iter197 = this->encoding_stats.begin(); _iter197 != this->encoding_stats.end(); ++_iter197) + std::vector ::const_iterator _iter224; + for (_iter224 = this->encoding_stats.begin(); _iter224 != this->encoding_stats.end(); ++_iter224) { - xfer += (*_iter197).write(oprot); + xfer += (*_iter224).write(oprot); } xfer += oprot->writeListEnd(); } @@ -3178,6 +3644,11 @@ uint32_t ColumnMetaData::write(Protocol_* oprot) const { xfer += this->size_statistics.write(oprot); xfer += oprot->writeFieldEnd(); } + if (this->__isset.geospatial_statistics) { + xfer += oprot->writeFieldBegin("geospatial_statistics", ::apache::thrift::protocol::T_STRUCT, 17); + xfer += this->geospatial_statistics.write(oprot); + xfer += oprot->writeFieldEnd(); + } xfer += oprot->writeFieldStop(); xfer += oprot->writeStructEnd(); return xfer; @@ -3250,14 +3721,14 @@ uint32_t EncryptionWithColumnKey::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->path_in_schema.clear(); - uint32_t _size206; - ::apache::thrift::protocol::TType _etype209; - xfer += iprot->readListBegin(_etype209, _size206); - this->path_in_schema.resize(_size206); - uint32_t _i210; - for (_i210 = 0; _i210 < _size206; ++_i210) + uint32_t _size233; + ::apache::thrift::protocol::TType _etype236; + xfer += iprot->readListBegin(_etype236, _size233); + this->path_in_schema.resize(_size233); + uint32_t _i237; + for (_i237 = 0; _i237 < _size233; ++_i237) { - xfer += iprot->readString(this->path_in_schema[_i210]); + xfer += iprot->readString(this->path_in_schema[_i237]); } xfer += iprot->readListEnd(); } @@ -3297,10 +3768,10 @@ uint32_t EncryptionWithColumnKey::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter211; - for (_iter211 = this->path_in_schema.begin(); _iter211 != this->path_in_schema.end(); ++_iter211) + std::vector ::const_iterator _iter238; + for (_iter238 = this->path_in_schema.begin(); _iter238 != this->path_in_schema.end(); ++_iter238) { - xfer += oprot->writeString((*_iter211)); + xfer += oprot->writeString((*_iter238)); } xfer += oprot->writeListEnd(); } @@ -3580,14 +4051,14 @@ uint32_t RowGroup::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->columns.clear(); - uint32_t _size224; - ::apache::thrift::protocol::TType _etype227; - xfer += iprot->readListBegin(_etype227, _size224); - this->columns.resize(_size224); - uint32_t _i228; - for (_i228 = 0; _i228 < _size224; ++_i228) + uint32_t _size251; + ::apache::thrift::protocol::TType _etype254; + xfer += iprot->readListBegin(_etype254, _size251); + this->columns.resize(_size251); + uint32_t _i255; + for (_i255 = 0; _i255 < _size251; ++_i255) { - xfer += this->columns[_i228].read(iprot); + xfer += this->columns[_i255].read(iprot); } xfer += iprot->readListEnd(); } @@ -3616,14 +4087,14 @@ uint32_t RowGroup::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->sorting_columns.clear(); - uint32_t _size229; - ::apache::thrift::protocol::TType _etype232; - xfer += iprot->readListBegin(_etype232, _size229); - this->sorting_columns.resize(_size229); - uint32_t _i233; - for (_i233 = 0; _i233 < _size229; ++_i233) + uint32_t _size256; + ::apache::thrift::protocol::TType _etype259; + xfer += iprot->readListBegin(_etype259, _size256); + this->sorting_columns.resize(_size256); + uint32_t _i260; + for (_i260 = 0; _i260 < _size256; ++_i260) { - xfer += this->sorting_columns[_i233].read(iprot); + xfer += this->sorting_columns[_i260].read(iprot); } xfer += iprot->readListEnd(); } @@ -3683,10 +4154,10 @@ uint32_t RowGroup::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->columns.size())); - std::vector ::const_iterator _iter234; - for (_iter234 = this->columns.begin(); _iter234 != this->columns.end(); ++_iter234) + std::vector ::const_iterator _iter261; + for (_iter261 = this->columns.begin(); _iter261 != this->columns.end(); ++_iter261) { - xfer += (*_iter234).write(oprot); + xfer += (*_iter261).write(oprot); } xfer += oprot->writeListEnd(); } @@ -3704,10 +4175,10 @@ uint32_t RowGroup::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->sorting_columns.size())); - std::vector ::const_iterator _iter235; - for (_iter235 = this->sorting_columns.begin(); _iter235 != this->sorting_columns.end(); ++_iter235) + std::vector ::const_iterator _iter262; + for (_iter262 = this->sorting_columns.begin(); _iter262 != this->sorting_columns.end(); ++_iter262) { - xfer += (*_iter235).write(oprot); + xfer += (*_iter262).write(oprot); } xfer += oprot->writeListEnd(); } @@ -3948,14 +4419,14 @@ uint32_t OffsetIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->page_locations.clear(); - uint32_t _size252; - ::apache::thrift::protocol::TType _etype255; - xfer += iprot->readListBegin(_etype255, _size252); - this->page_locations.resize(_size252); - uint32_t _i256; - for (_i256 = 0; _i256 < _size252; ++_i256) + uint32_t _size279; + ::apache::thrift::protocol::TType _etype282; + xfer += iprot->readListBegin(_etype282, _size279); + this->page_locations.resize(_size279); + uint32_t _i283; + for (_i283 = 0; _i283 < _size279; ++_i283) { - xfer += this->page_locations[_i256].read(iprot); + xfer += this->page_locations[_i283].read(iprot); } xfer += iprot->readListEnd(); } @@ -3968,14 +4439,14 @@ uint32_t OffsetIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->unencoded_byte_array_data_bytes.clear(); - uint32_t _size257; - ::apache::thrift::protocol::TType _etype260; - xfer += iprot->readListBegin(_etype260, _size257); - this->unencoded_byte_array_data_bytes.resize(_size257); - uint32_t _i261; - for (_i261 = 0; _i261 < _size257; ++_i261) + uint32_t _size284; + ::apache::thrift::protocol::TType _etype287; + xfer += iprot->readListBegin(_etype287, _size284); + this->unencoded_byte_array_data_bytes.resize(_size284); + uint32_t _i288; + for (_i288 = 0; _i288 < _size284; ++_i288) { - xfer += iprot->readI64(this->unencoded_byte_array_data_bytes[_i261]); + xfer += iprot->readI64(this->unencoded_byte_array_data_bytes[_i288]); } xfer += iprot->readListEnd(); } @@ -4007,10 +4478,10 @@ uint32_t OffsetIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("page_locations", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->page_locations.size())); - std::vector ::const_iterator _iter262; - for (_iter262 = this->page_locations.begin(); _iter262 != this->page_locations.end(); ++_iter262) + std::vector ::const_iterator _iter289; + for (_iter289 = this->page_locations.begin(); _iter289 != this->page_locations.end(); ++_iter289) { - xfer += (*_iter262).write(oprot); + xfer += (*_iter289).write(oprot); } xfer += oprot->writeListEnd(); } @@ -4020,10 +4491,10 @@ uint32_t OffsetIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("unencoded_byte_array_data_bytes", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->unencoded_byte_array_data_bytes.size())); - std::vector ::const_iterator _iter263; - for (_iter263 = this->unencoded_byte_array_data_bytes.begin(); _iter263 != this->unencoded_byte_array_data_bytes.end(); ++_iter263) + std::vector ::const_iterator _iter290; + for (_iter290 = this->unencoded_byte_array_data_bytes.begin(); _iter290 != this->unencoded_byte_array_data_bytes.end(); ++_iter290) { - xfer += oprot->writeI64((*_iter263)); + xfer += oprot->writeI64((*_iter290)); } xfer += oprot->writeListEnd(); } @@ -4064,14 +4535,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->null_pages.clear(); - uint32_t _size268; - ::apache::thrift::protocol::TType _etype271; - xfer += iprot->readListBegin(_etype271, _size268); - this->null_pages.resize(_size268); - uint32_t _i272; - for (_i272 = 0; _i272 < _size268; ++_i272) + uint32_t _size295; + ::apache::thrift::protocol::TType _etype298; + xfer += iprot->readListBegin(_etype298, _size295); + this->null_pages.resize(_size295); + uint32_t _i299; + for (_i299 = 0; _i299 < _size295; ++_i299) { - xfer += iprot->readBool(this->null_pages[_i272]); + xfer += iprot->readBool(this->null_pages[_i299]); } xfer += iprot->readListEnd(); } @@ -4084,14 +4555,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->min_values.clear(); - uint32_t _size273; - ::apache::thrift::protocol::TType _etype276; - xfer += iprot->readListBegin(_etype276, _size273); - this->min_values.resize(_size273); - uint32_t _i277; - for (_i277 = 0; _i277 < _size273; ++_i277) + uint32_t _size300; + ::apache::thrift::protocol::TType _etype303; + xfer += iprot->readListBegin(_etype303, _size300); + this->min_values.resize(_size300); + uint32_t _i304; + for (_i304 = 0; _i304 < _size300; ++_i304) { - xfer += iprot->readBinary(this->min_values[_i277]); + xfer += iprot->readBinary(this->min_values[_i304]); } xfer += iprot->readListEnd(); } @@ -4104,14 +4575,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->max_values.clear(); - uint32_t _size278; - ::apache::thrift::protocol::TType _etype281; - xfer += iprot->readListBegin(_etype281, _size278); - this->max_values.resize(_size278); - uint32_t _i282; - for (_i282 = 0; _i282 < _size278; ++_i282) + uint32_t _size305; + ::apache::thrift::protocol::TType _etype308; + xfer += iprot->readListBegin(_etype308, _size305); + this->max_values.resize(_size305); + uint32_t _i309; + for (_i309 = 0; _i309 < _size305; ++_i309) { - xfer += iprot->readBinary(this->max_values[_i282]); + xfer += iprot->readBinary(this->max_values[_i309]); } xfer += iprot->readListEnd(); } @@ -4122,9 +4593,9 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { break; case 4: if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast283; - xfer += iprot->readI32(ecast283); - this->boundary_order = static_cast(ecast283); + int32_t ecast310; + xfer += iprot->readI32(ecast310); + this->boundary_order = static_cast(ecast310); isset_boundary_order = true; } else { xfer += iprot->skip(ftype); @@ -4134,14 +4605,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->null_counts.clear(); - uint32_t _size284; - ::apache::thrift::protocol::TType _etype287; - xfer += iprot->readListBegin(_etype287, _size284); - this->null_counts.resize(_size284); - uint32_t _i288; - for (_i288 = 0; _i288 < _size284; ++_i288) + uint32_t _size311; + ::apache::thrift::protocol::TType _etype314; + xfer += iprot->readListBegin(_etype314, _size311); + this->null_counts.resize(_size311); + uint32_t _i315; + for (_i315 = 0; _i315 < _size311; ++_i315) { - xfer += iprot->readI64(this->null_counts[_i288]); + xfer += iprot->readI64(this->null_counts[_i315]); } xfer += iprot->readListEnd(); } @@ -4154,14 +4625,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->repetition_level_histograms.clear(); - uint32_t _size289; - ::apache::thrift::protocol::TType _etype292; - xfer += iprot->readListBegin(_etype292, _size289); - this->repetition_level_histograms.resize(_size289); - uint32_t _i293; - for (_i293 = 0; _i293 < _size289; ++_i293) + uint32_t _size316; + ::apache::thrift::protocol::TType _etype319; + xfer += iprot->readListBegin(_etype319, _size316); + this->repetition_level_histograms.resize(_size316); + uint32_t _i320; + for (_i320 = 0; _i320 < _size316; ++_i320) { - xfer += iprot->readI64(this->repetition_level_histograms[_i293]); + xfer += iprot->readI64(this->repetition_level_histograms[_i320]); } xfer += iprot->readListEnd(); } @@ -4174,14 +4645,14 @@ uint32_t ColumnIndex::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->definition_level_histograms.clear(); - uint32_t _size294; - ::apache::thrift::protocol::TType _etype297; - xfer += iprot->readListBegin(_etype297, _size294); - this->definition_level_histograms.resize(_size294); - uint32_t _i298; - for (_i298 = 0; _i298 < _size294; ++_i298) + uint32_t _size321; + ::apache::thrift::protocol::TType _etype324; + xfer += iprot->readListBegin(_etype324, _size321); + this->definition_level_histograms.resize(_size321); + uint32_t _i325; + for (_i325 = 0; _i325 < _size321; ++_i325) { - xfer += iprot->readI64(this->definition_level_histograms[_i298]); + xfer += iprot->readI64(this->definition_level_histograms[_i325]); } xfer += iprot->readListEnd(); } @@ -4219,10 +4690,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("null_pages", ::apache::thrift::protocol::T_LIST, 1); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_BOOL, static_cast(this->null_pages.size())); - std::vector ::const_iterator _iter299; - for (_iter299 = this->null_pages.begin(); _iter299 != this->null_pages.end(); ++_iter299) + std::vector ::const_iterator _iter326; + for (_iter326 = this->null_pages.begin(); _iter326 != this->null_pages.end(); ++_iter326) { - xfer += oprot->writeBool((*_iter299)); + xfer += oprot->writeBool((*_iter326)); } xfer += oprot->writeListEnd(); } @@ -4231,10 +4702,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("min_values", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->min_values.size())); - std::vector ::const_iterator _iter300; - for (_iter300 = this->min_values.begin(); _iter300 != this->min_values.end(); ++_iter300) + std::vector ::const_iterator _iter327; + for (_iter327 = this->min_values.begin(); _iter327 != this->min_values.end(); ++_iter327) { - xfer += oprot->writeBinary((*_iter300)); + xfer += oprot->writeBinary((*_iter327)); } xfer += oprot->writeListEnd(); } @@ -4243,10 +4714,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("max_values", ::apache::thrift::protocol::T_LIST, 3); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->max_values.size())); - std::vector ::const_iterator _iter301; - for (_iter301 = this->max_values.begin(); _iter301 != this->max_values.end(); ++_iter301) + std::vector ::const_iterator _iter328; + for (_iter328 = this->max_values.begin(); _iter328 != this->max_values.end(); ++_iter328) { - xfer += oprot->writeBinary((*_iter301)); + xfer += oprot->writeBinary((*_iter328)); } xfer += oprot->writeListEnd(); } @@ -4260,10 +4731,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("null_counts", ::apache::thrift::protocol::T_LIST, 5); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->null_counts.size())); - std::vector ::const_iterator _iter302; - for (_iter302 = this->null_counts.begin(); _iter302 != this->null_counts.end(); ++_iter302) + std::vector ::const_iterator _iter329; + for (_iter329 = this->null_counts.begin(); _iter329 != this->null_counts.end(); ++_iter329) { - xfer += oprot->writeI64((*_iter302)); + xfer += oprot->writeI64((*_iter329)); } xfer += oprot->writeListEnd(); } @@ -4273,10 +4744,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("repetition_level_histograms", ::apache::thrift::protocol::T_LIST, 6); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histograms.size())); - std::vector ::const_iterator _iter303; - for (_iter303 = this->repetition_level_histograms.begin(); _iter303 != this->repetition_level_histograms.end(); ++_iter303) + std::vector ::const_iterator _iter330; + for (_iter330 = this->repetition_level_histograms.begin(); _iter330 != this->repetition_level_histograms.end(); ++_iter330) { - xfer += oprot->writeI64((*_iter303)); + xfer += oprot->writeI64((*_iter330)); } xfer += oprot->writeListEnd(); } @@ -4286,10 +4757,10 @@ uint32_t ColumnIndex::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("definition_level_histograms", ::apache::thrift::protocol::T_LIST, 7); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histograms.size())); - std::vector ::const_iterator _iter304; - for (_iter304 = this->definition_level_histograms.begin(); _iter304 != this->definition_level_histograms.end(); ++_iter304) + std::vector ::const_iterator _iter331; + for (_iter331 = this->definition_level_histograms.begin(); _iter331 != this->definition_level_histograms.end(); ++_iter331) { - xfer += oprot->writeI64((*_iter304)); + xfer += oprot->writeI64((*_iter331)); } xfer += oprot->writeListEnd(); } @@ -4577,14 +5048,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->schema.clear(); - uint32_t _size321; - ::apache::thrift::protocol::TType _etype324; - xfer += iprot->readListBegin(_etype324, _size321); - this->schema.resize(_size321); - uint32_t _i325; - for (_i325 = 0; _i325 < _size321; ++_i325) + uint32_t _size348; + ::apache::thrift::protocol::TType _etype351; + xfer += iprot->readListBegin(_etype351, _size348); + this->schema.resize(_size348); + uint32_t _i352; + for (_i352 = 0; _i352 < _size348; ++_i352) { - xfer += this->schema[_i325].read(iprot); + xfer += this->schema[_i352].read(iprot); } xfer += iprot->readListEnd(); } @@ -4605,14 +5076,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->row_groups.clear(); - uint32_t _size326; - ::apache::thrift::protocol::TType _etype329; - xfer += iprot->readListBegin(_etype329, _size326); - this->row_groups.resize(_size326); - uint32_t _i330; - for (_i330 = 0; _i330 < _size326; ++_i330) + uint32_t _size353; + ::apache::thrift::protocol::TType _etype356; + xfer += iprot->readListBegin(_etype356, _size353); + this->row_groups.resize(_size353); + uint32_t _i357; + for (_i357 = 0; _i357 < _size353; ++_i357) { - xfer += this->row_groups[_i330].read(iprot); + xfer += this->row_groups[_i357].read(iprot); } xfer += iprot->readListEnd(); } @@ -4625,14 +5096,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->key_value_metadata.clear(); - uint32_t _size331; - ::apache::thrift::protocol::TType _etype334; - xfer += iprot->readListBegin(_etype334, _size331); - this->key_value_metadata.resize(_size331); - uint32_t _i335; - for (_i335 = 0; _i335 < _size331; ++_i335) + uint32_t _size358; + ::apache::thrift::protocol::TType _etype361; + xfer += iprot->readListBegin(_etype361, _size358); + this->key_value_metadata.resize(_size358); + uint32_t _i362; + for (_i362 = 0; _i362 < _size358; ++_i362) { - xfer += this->key_value_metadata[_i335].read(iprot); + xfer += this->key_value_metadata[_i362].read(iprot); } xfer += iprot->readListEnd(); } @@ -4653,14 +5124,14 @@ uint32_t FileMetaData::read(Protocol_* iprot) { if (ftype == ::apache::thrift::protocol::T_LIST) { { this->column_orders.clear(); - uint32_t _size336; - ::apache::thrift::protocol::TType _etype339; - xfer += iprot->readListBegin(_etype339, _size336); - this->column_orders.resize(_size336); - uint32_t _i340; - for (_i340 = 0; _i340 < _size336; ++_i340) + uint32_t _size363; + ::apache::thrift::protocol::TType _etype366; + xfer += iprot->readListBegin(_etype366, _size363); + this->column_orders.resize(_size363); + uint32_t _i367; + for (_i367 = 0; _i367 < _size363; ++_i367) { - xfer += this->column_orders[_i340].read(iprot); + xfer += this->column_orders[_i367].read(iprot); } xfer += iprot->readListEnd(); } @@ -4718,10 +5189,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); - std::vector ::const_iterator _iter341; - for (_iter341 = this->schema.begin(); _iter341 != this->schema.end(); ++_iter341) + std::vector ::const_iterator _iter368; + for (_iter368 = this->schema.begin(); _iter368 != this->schema.end(); ++_iter368) { - xfer += (*_iter341).write(oprot); + xfer += (*_iter368).write(oprot); } xfer += oprot->writeListEnd(); } @@ -4734,10 +5205,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); - std::vector ::const_iterator _iter342; - for (_iter342 = this->row_groups.begin(); _iter342 != this->row_groups.end(); ++_iter342) + std::vector ::const_iterator _iter369; + for (_iter369 = this->row_groups.begin(); _iter369 != this->row_groups.end(); ++_iter369) { - xfer += (*_iter342).write(oprot); + xfer += (*_iter369).write(oprot); } xfer += oprot->writeListEnd(); } @@ -4747,10 +5218,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter343; - for (_iter343 = this->key_value_metadata.begin(); _iter343 != this->key_value_metadata.end(); ++_iter343) + std::vector ::const_iterator _iter370; + for (_iter370 = this->key_value_metadata.begin(); _iter370 != this->key_value_metadata.end(); ++_iter370) { - xfer += (*_iter343).write(oprot); + xfer += (*_iter370).write(oprot); } xfer += oprot->writeListEnd(); } @@ -4765,10 +5236,10 @@ uint32_t FileMetaData::write(Protocol_* oprot) const { xfer += oprot->writeFieldBegin("column_orders", ::apache::thrift::protocol::T_LIST, 7); { xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->column_orders.size())); - std::vector ::const_iterator _iter344; - for (_iter344 = this->column_orders.begin(); _iter344 != this->column_orders.end(); ++_iter344) + std::vector ::const_iterator _iter371; + for (_iter371 = this->column_orders.begin(); _iter371 != this->column_orders.end(); ++_iter371) { - xfer += (*_iter344).write(oprot); + xfer += (*_iter371).write(oprot); } xfer += oprot->writeListEnd(); } diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index 83eb522484ba0..35f61aa072cfd 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -171,6 +171,8 @@ set(PARQUET_SRCS exception.cc file_reader.cc file_writer.cc + geometry_statistics.cc + geometry_util_internal.cc level_comparison.cc level_conversion.cc metadata.cc @@ -259,6 +261,10 @@ endif() if(NOT PARQUET_MINIMAL_DEPENDENCY) list(APPEND PARQUET_SHARED_LINK_LIBS arrow_shared) + # TODO(paleolimbot): Remove once sample files are generated + list(APPEND PARQUET_SHARED_LINK_LIBS RapidJSON) + list(APPEND PARQUET_STATIC_LINK_LIBS RapidJSON) + # These are libraries that we will link privately with parquet_shared (as they # do not need to be linked transitively by other linkers) list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS thrift::thrift) @@ -372,6 +378,8 @@ add_parquet_test(internals-test statistics_test.cc encoding_test.cc metadata_test.cc + geometry_statistics_test.cc + geometry_util_internal_test.cc page_index_test.cc public_api_test.cc size_statistics_test.cc diff --git a/cpp/src/parquet/api/reader.h b/cpp/src/parquet/api/reader.h index 7e746e8c5bbf5..a65d707304386 100644 --- a/cpp/src/parquet/api/reader.h +++ b/cpp/src/parquet/api/reader.h @@ -22,6 +22,7 @@ #include "parquet/column_scanner.h" #include "parquet/exception.h" #include "parquet/file_reader.h" +#include "parquet/geometry_statistics.h" #include "parquet/metadata.h" #include "parquet/platform.h" #include "parquet/printer.h" diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index 27cb849365ca7..c84b9225b8ca7 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -43,6 +43,7 @@ #include "arrow/scalar.h" #include "arrow/table.h" #include "arrow/testing/builder.h" +#include "arrow/testing/extension_type.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" #include "arrow/testing/util.h" @@ -1481,6 +1482,56 @@ TEST_F(TestJsonParquetIO, JsonExtension) { this->RoundTripSingleColumn(json_large_array, json_large_array, writer_properties); } +using TestGeoArrowParquetIO = TestParquetIO; + +TEST_F(TestGeoArrowParquetIO, GeoArrowExtension) { + ::arrow::ExtensionTypeGuard guard(test::geoarrow_wkb()); + + // Build a binary WKB array with at least one null value + ::arrow::BinaryBuilder builder; + std::array item; + for (int k = 0; k < 10; k++) { + test::GenerateWKBPoint(reinterpret_cast(item.data()), k, k + 1); + ASSERT_OK(builder.AppendValues({std::string(item.data(), item.size())})); + } + ASSERT_OK(builder.AppendNull()); + for (int k = 0; k < 5; k++) { + test::GenerateWKBPoint(reinterpret_cast(item.data()), k, k + 1); + ASSERT_OK(builder.AppendValues({std::string(item.data(), item.size())})); + } + + ASSERT_OK_AND_ASSIGN(const auto binary_array, builder.Finish()); + const auto wkb_type = test::geoarrow_wkb_lonlat(); + const auto wkb_array = ::arrow::ExtensionType::WrapArray(wkb_type, binary_array); + + const auto large_wkb_type = test::geoarrow_wkb_lonlat(::arrow::large_binary()); + ASSERT_OK_AND_ASSIGN(const auto large_binary_array, + ::arrow::compute::Cast(binary_array, ::arrow::large_binary())); + const auto large_wkb_array = + ::arrow::ExtensionType::WrapArray(large_wkb_type, large_binary_array.make_array()); + + // When the original Arrow schema isn't stored and Arrow extensions are disabled, + // LogicalType::GEOMETRY is read as utf8. + auto writer_properties = ::parquet::ArrowWriterProperties::Builder() + .write_geospatial_logical_types() + ->build(); + this->RoundTripSingleColumn(wkb_array, binary_array, writer_properties); + this->RoundTripSingleColumn(large_wkb_array, binary_array, writer_properties); + + // When the original Arrow schema isn't stored and Arrow extensions are enabled, + // LogicalType::GEOMETRY is read as geoarrow.wkb with binary storage. + ::parquet::ArrowReaderProperties reader_properties; + reader_properties.set_arrow_extensions_enabled(true); + this->RoundTripSingleColumn(wkb_array, wkb_array, writer_properties, reader_properties); + this->RoundTripSingleColumn(large_wkb_array, wkb_array, writer_properties, + reader_properties); + + // When the original Arrow schema is stored, the stored Arrow type is respected. + writer_properties = ::parquet::ArrowWriterProperties::Builder().store_schema()->build(); + this->RoundTripSingleColumn(wkb_array, wkb_array, writer_properties); + this->RoundTripSingleColumn(large_wkb_array, large_wkb_array, writer_properties); +} + using TestNullParquetIO = TestParquetIO<::arrow::NullType>; TEST_F(TestNullParquetIO, NullColumn) { diff --git a/cpp/src/parquet/arrow/arrow_schema_test.cc b/cpp/src/parquet/arrow/arrow_schema_test.cc index 535efa0c8e5de..7da98cc0dae34 100644 --- a/cpp/src/parquet/arrow/arrow_schema_test.cc +++ b/cpp/src/parquet/arrow/arrow_schema_test.cc @@ -34,6 +34,7 @@ #include "arrow/array.h" #include "arrow/extension/json.h" #include "arrow/ipc/writer.h" +#include "arrow/testing/extension_type.h" #include "arrow/testing/gtest_util.h" #include "arrow/type.h" #include "arrow/util/base64.h" @@ -236,6 +237,10 @@ TEST_F(TestConvertParquetSchema, ParquetAnnotatedFields) { ::arrow::int64()}, {"json", LogicalType::JSON(), ParquetType::BYTE_ARRAY, -1, ::arrow::utf8()}, {"bson", LogicalType::BSON(), ParquetType::BYTE_ARRAY, -1, ::arrow::binary()}, + {"geometry", LogicalType::Geometry(), ParquetType::BYTE_ARRAY, -1, + ::arrow::binary()}, + {"geography", LogicalType::Geography(), ParquetType::BYTE_ARRAY, -1, + ::arrow::binary()}, {"interval", LogicalType::Interval(), ParquetType::FIXED_LEN_BYTE_ARRAY, 12, ::arrow::fixed_size_binary(12)}, {"uuid", LogicalType::UUID(), ParquetType::FIXED_LEN_BYTE_ARRAY, 16, @@ -948,6 +953,49 @@ TEST_F(TestConvertParquetSchema, ParquetSchemaArrowExtensions) { } } +TEST_F(TestConvertParquetSchema, ParquetSchemaGeoArrowExtensions) { + std::vector parquet_fields; + parquet_fields.push_back(PrimitiveNode::Make("geometry", Repetition::OPTIONAL, + LogicalType::Geometry(), + ParquetType::BYTE_ARRAY)); + parquet_fields.push_back(PrimitiveNode::Make("geography", Repetition::OPTIONAL, + LogicalType::Geography(), + ParquetType::BYTE_ARRAY)); + + { + // Parquet file does not contain Arrow schema. + // By default, both fields should be treated as binary() fields in Arrow. + auto arrow_schema = ::arrow::schema({::arrow::field("geometry", BINARY, true), + ::arrow::field("geography", BINARY, true)}); + std::shared_ptr metadata{}; + ASSERT_OK(ConvertSchema(parquet_fields, metadata)); + CheckFlatSchema(arrow_schema); + } + + { + // Parquet file does not contain Arrow schema. + // If Arrow extensions are enabled and extensions are registered, + // fields will be interpreted as geoarrow_wkb(binary()) extension fields. + ::arrow::ExtensionTypeGuard guard(test::geoarrow_wkb()); + + ArrowReaderProperties props; + props.set_arrow_extensions_enabled(true); + auto arrow_schema = ::arrow::schema( + {::arrow::field( + "geometry", + test::geoarrow_wkb(R"({"crs": "OGC:CRS84", "crs_type": "authority_code"})"), + true), + ::arrow::field( + "geography", + test::geoarrow_wkb( + R"({"crs": "OGC:CRS84", "crs_type": "authority_code", "edges": "spherical"})"), + true)}); + std::shared_ptr metadata{}; + ASSERT_OK(ConvertSchema(parquet_fields, metadata, props)); + CheckFlatSchema(arrow_schema); + } +} + class TestConvertArrowSchema : public ::testing::Test { public: virtual void SetUp() {} @@ -963,7 +1011,8 @@ class TestConvertArrowSchema : public ::testing::Test { for (int i = 0; i < expected_schema_node->field_count(); i++) { auto lhs = result_schema_node->field(i); auto rhs = expected_schema_node->field(i); - EXPECT_TRUE(lhs->Equals(rhs.get())); + EXPECT_TRUE(lhs->Equals(rhs.get())) + << lhs->logical_type()->ToString() << " != " << rhs->logical_type()->ToString(); } } @@ -1201,6 +1250,111 @@ TEST_F(TestConvertArrowSchema, ParquetFlatPrimitivesAsDictionaries) { ASSERT_NO_FATAL_FAILURE(CheckFlatSchema(parquet_fields)); } +TEST_F(TestConvertArrowSchema, ParquetGeoArrowCrsLonLat) { + // All the Arrow Schemas below should convert to the type defaults for GEOMETRY + // and GEOGRAPHY when GeoArrow extension types are registered and the appropriate + // writer option is set. + ::arrow::ExtensionTypeGuard guard(test::geoarrow_wkb()); + + ArrowWriterProperties::Builder builder; + builder.write_geospatial_logical_types(); + auto arrow_properties = builder.build(); + + std::vector parquet_fields; + parquet_fields.push_back(PrimitiveNode::Make("geometry", Repetition::OPTIONAL, + LogicalType::Geometry(), + ParquetType::BYTE_ARRAY)); + parquet_fields.push_back(PrimitiveNode::Make("geography", Repetition::OPTIONAL, + LogicalType::Geography(), + ParquetType::BYTE_ARRAY)); + + // There are several ways that longitude/latitude could be specified when coming from + // GeoArrow, which allows null, missing, arbitrary strings (e.g., Authority:Code), and + // PROJJSON. + std::vector geoarrow_lonlat = { + "null", R"("OGC:CRS84")", R"("EPSG:4326")", + // Purely the parts of the PROJJSON that we inspect to check the lon/lat case + R"({"id": {"authority": "OGC", "code": "CRS84"}})", + R"({"id": {"authority": "EPSG", "code": 4326}})"}; + + std::string geoarrow_lonlatish_crs = geoarrow_lonlat[0]; + for (const auto& geoarrow_lonlatish_crs : geoarrow_lonlat) { + SCOPED_TRACE(geoarrow_lonlatish_crs); + std::vector> arrow_fields = { + ::arrow::field("geometry", + test::geoarrow_wkb(R"({"crs": )" + geoarrow_lonlatish_crs + "}"), + true), + ::arrow::field("geography", + test::geoarrow_wkb(R"({"crs": )" + geoarrow_lonlatish_crs + + R"(, "edges": "spherical"})"), + true)}; + + ASSERT_OK(ConvertSchema(arrow_fields, arrow_properties)); + ASSERT_NO_FATAL_FAILURE(CheckFlatSchema(parquet_fields)); + } +} + +TEST_F(TestConvertArrowSchema, ParquetGeoArrowCrsSrid) { + // Checks the conversion between GeoArrow's crs_type: srid and Parquet's srid:XXX. + // SRID (spatial reference identifier) is an opaque application specific identifier + // that GeoArrow will transport but refuse to resolve if required for a spatial + // operation. + ::arrow::ExtensionTypeGuard guard(test::geoarrow_wkb()); + + ArrowWriterProperties::Builder builder; + builder.write_geospatial_logical_types(); + auto arrow_properties = builder.build(); + + std::vector parquet_fields; + parquet_fields.push_back(PrimitiveNode::Make("geometry", Repetition::OPTIONAL, + LogicalType::Geometry("srid:1234"), + ParquetType::BYTE_ARRAY)); + parquet_fields.push_back(PrimitiveNode::Make("geography", Repetition::OPTIONAL, + LogicalType::Geography("srid:5678"), + ParquetType::BYTE_ARRAY)); + + std::vector> arrow_fields = { + ::arrow::field("geometry", + test::geoarrow_wkb(R"({"crs": "1234", "crs_type": "srid"})"), true), + ::arrow::field("geography", + test::geoarrow_wkb( + R"({"crs": "5678", "crs_type": "srid", "edges": "spherical"})"), + true)}; + + ASSERT_OK(ConvertSchema(arrow_fields, arrow_properties)); + ASSERT_NO_FATAL_FAILURE(CheckFlatSchema(parquet_fields)); +} + +TEST_F(TestConvertArrowSchema, ParquetGeoArrowCrsProjjson) { + // Checks the conversion between GeoArrow that contains non-lon/lat PROJJSON + // to Parquet. Almost all GeoArrow types that arrive at the Parquet reader + // will have their CRS expressed in this way. + ::arrow::ExtensionTypeGuard guard(test::geoarrow_wkb()); + + ArrowWriterProperties::Builder builder; + builder.write_geospatial_logical_types(); + auto arrow_properties = builder.build(); + + std::vector parquet_fields; + parquet_fields.push_back(PrimitiveNode::Make("geometry", Repetition::OPTIONAL, + LogicalType::Geometry("projjson:{}"), + ParquetType::BYTE_ARRAY)); + parquet_fields.push_back(PrimitiveNode::Make("geography", Repetition::OPTIONAL, + LogicalType::Geography("projjson:{}"), + ParquetType::BYTE_ARRAY)); + + std::vector> arrow_fields = { + ::arrow::field("geometry", + test::geoarrow_wkb(R"({"crs": {}, "crs_type": "projjson"})"), true), + ::arrow::field("geography", + test::geoarrow_wkb( + R"({"crs": {}, "crs_type": "projjson", "edges": "spherical"})"), + true)}; + + ASSERT_OK(ConvertSchema(arrow_fields, arrow_properties)); + ASSERT_NO_FATAL_FAILURE(CheckFlatSchema(parquet_fields)); +} + TEST_F(TestConvertArrowSchema, ParquetLists) { std::vector parquet_fields; std::vector> arrow_fields; diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc index d94c73452c44d..3b98de12a73fa 100644 --- a/cpp/src/parquet/arrow/schema.cc +++ b/cpp/src/parquet/arrow/schema.cc @@ -21,6 +21,12 @@ #include #include +// TODO(paleolimbot): Remove once example files are generated +#include "arrow/json/rapidjson_defs.h" // IWYU pragma: keep + +#include +#include + #include "arrow/extension/json.h" #include "arrow/extension_type.h" #include "arrow/io/memory.h" @@ -243,6 +249,92 @@ static Status GetTimestampMetadata(const ::arrow::TimestampType& type, return Status::OK(); } +// TODO(paleolimbot): Remove once example files are written +Result GeospatialGeoArrowCrsToParquetCrs( + const ::arrow::rapidjson::Document& document, + const ArrowWriterProperties& arrow_properties) { + namespace rj = ::arrow::rapidjson; + + std::string crs_type; + if (document.HasMember("crs_type")) { + crs_type = document["crs_type"].GetString(); + } + + if (!document.HasMember("crs") || document["crs"].IsNull()) { + // Parquet GEOMETRY/GEOGRAPHY do not have a concept of a null/missing + // CRS, but an omitted one is more likely to have meant "lon/lat" than + // a truly unspecified one (i.e., Engineering CRS with arbitrary XY units) + return ""; + } + + const auto& json_crs = document["crs"]; + if (json_crs.IsString() && crs_type == "srid") { + // srid is an application-specific identifier. GeoArrow lets this be propagated via + // "crs_type": "srid". + return std::string("srid:") + json_crs.GetString(); + } else if (json_crs.IsString() && + (json_crs == "EPSG:4326" || json_crs == "OGC:CRS84")) { + // crs can be left empty because these cases both correspond to + // longitude/latitude in WGS84 according to the Parquet specification + return ""; + } else if (json_crs.IsObject()) { + if (json_crs.HasMember("id")) { + const auto& identifier = json_crs["id"]; + if (identifier.HasMember("authority") && identifier.HasMember("code")) { + if (identifier["authority"] == "OGC" && identifier["code"] == "CRS84") { + // longitude/latitude + return ""; + } else if (identifier["authority"] == "EPSG" && identifier["code"] == 4326) { + // longitude/latitude + return ""; + } + } + } + + // TODO(paleolimbot) this is not quite correct because we're supposed to put this + // in the metadata according to the spec. I can't find a good way to get a mutable + // reference to the global metadata here yet. + rj::StringBuffer buffer; + rj::Writer writer(buffer); + json_crs.Accept(writer); + return std::string("projjson:") + buffer.GetString(); + } else { + // e.g., authority:code, WKT2, arbitrary string. A pluggable CrsProvider + // could handle these and return something we're allowed to write here. + return Status::Invalid("Unsupported GeoArrow CRS for Parquet"); + } +} + +Result> GeospatialLogicalTypeFromArrow( + const std::string& serialized_data, const ArrowWriterProperties& arrow_properties) { + // Parquet has no way to interpret a null or missing CRS; however, it is more likely + // to induce confusion insert the fully specified equivalent of a null CRS (custom + // engineering CRS with unspecified units) + if (serialized_data.empty() || serialized_data == "{}") { + return LogicalType::Geometry(); + } + + namespace rj = ::arrow::rapidjson; + rj::Document document; + if (document.Parse(serialized_data.data(), serialized_data.length()).HasParseError()) { + return Status::Invalid("Invalid serialized JSON data: ", serialized_data); + } + + ARROW_ASSIGN_OR_RAISE(std::string crs, + GeospatialGeoArrowCrsToParquetCrs(document, arrow_properties)); + + if (document.HasMember("edges") && document["edges"] == "planar") { + return LogicalType::Geometry(crs); + } else if (document.HasMember("edges") && document["edges"] == "spherical") { + return LogicalType::Geography(crs, + LogicalType::EdgeInterpolationAlgorithm::SPHERICAL); + } else if (document.HasMember("edges")) { + return Status::NotImplemented("GeoArrow edge type: ", serialized_data); + } + + return LogicalType::Geometry(crs); +} + static constexpr char FIELD_ID_KEY[] = "PARQUET:field_id"; std::shared_ptr<::arrow::KeyValueMetadata> FieldIdMetadata(int field_id) { @@ -267,8 +359,8 @@ int FieldIdFromMetadata( if (::arrow::internal::ParseValue<::arrow::Int32Type>( field_id_str.c_str(), field_id_str.length(), &field_id)) { if (field_id < 0) { - // Thrift should convert any negative value to null but normalize to -1 here in case - // we later check this in logic. + // Thrift should convert any negative value to null but normalize to -1 here in + // case we later check this in logic. return -1; } return field_id; @@ -428,13 +520,20 @@ Status FieldToNode(const std::string& name, const std::shared_ptr& field, } case ArrowTypeId::EXTENSION: { auto ext_type = std::static_pointer_cast<::arrow::ExtensionType>(field->type()); - // Built-in JSON extension is handled differently. + // Built-in JSON extension and GeoArrow are handled differently. if (ext_type->extension_name() == std::string("arrow.json")) { // Set physical and logical types and instantiate primitive node. type = ParquetType::BYTE_ARRAY; logical_type = LogicalType::JSON(); break; + } else if (arrow_properties.write_geospatial_logical_types() && + ext_type->extension_name() == std::string("geoarrow.wkb")) { + type = ParquetType::BYTE_ARRAY; + ARROW_ASSIGN_OR_RAISE(logical_type, GeospatialLogicalTypeFromArrow( + ext_type->Serialize(), arrow_properties)); + break; } + std::shared_ptr<::arrow::Field> storage_field = ::arrow::field( name, ext_type->storage_type(), field->nullable(), field->metadata()); return FieldToNode(name, storage_field, properties, arrow_properties, out); @@ -463,6 +562,7 @@ struct SchemaTreeContext { SchemaManifest* manifest; ArrowReaderProperties properties; const SchemaDescriptor* schema; + std::shared_ptr metadata; void LinkParent(const SchemaField* child, const SchemaField* parent) { manifest->child_to_parent[child] = parent; @@ -485,7 +585,7 @@ ::arrow::Result> GetTypeForNode( int column_index, const schema::PrimitiveNode& primitive_node, SchemaTreeContext* ctx) { ARROW_ASSIGN_OR_RAISE(std::shared_ptr storage_type, - GetArrowType(primitive_node, ctx->properties)); + GetArrowType(primitive_node, ctx->properties, ctx->metadata)); if (ctx->properties.read_dictionary(column_index) && IsDictionaryReadSupported(*storage_type)) { return ::arrow::dictionary(::arrow::int32(), storage_type); @@ -578,8 +678,8 @@ Status MapToSchemaField(const GroupNode& group, LevelInfo current_levels, return Status::Invalid("Map keys must be annotated as required."); } // Arrow doesn't support 1 column maps (i.e. Sets). The options are to either - // make the values column nullable, or process the map as a list. We choose the latter - // as it is simpler. + // make the values column nullable, or process the map as a list. We choose the + // latter as it is simpler. if (key_value.field_count() == 1) { return ListToSchemaField(group, current_levels, ctx, parent, out); } @@ -1150,6 +1250,7 @@ Status SchemaManifest::Make(const SchemaDescriptor* schema, ctx.manifest = manifest; ctx.properties = properties; ctx.schema = schema; + ctx.metadata = metadata; const GroupNode& schema_node = *schema->group_node(); manifest->descr = schema; manifest->schema_fields.resize(schema_node.field_count()); diff --git a/cpp/src/parquet/arrow/schema_internal.cc b/cpp/src/parquet/arrow/schema_internal.cc index 261a00940654d..e07101e2048b0 100644 --- a/cpp/src/parquet/arrow/schema_internal.cc +++ b/cpp/src/parquet/arrow/schema_internal.cc @@ -20,6 +20,7 @@ #include "arrow/extension/json.h" #include "arrow/type.h" +#include "arrow/util/key_value_metadata.h" #include "parquet/properties.h" using ArrowType = ::arrow::DataType; @@ -110,8 +111,62 @@ Result> MakeArrowTimestamp(const LogicalType& logical } } +Result MakeGeoArrowCrsMetadata( + const std::string& crs, + const std::shared_ptr& metadata) { + std::string srid_prefix{"srid:"}; + std::string projjson_prefix{"projjson:"}; + + if (crs.empty()) { + return R"("crs": "OGC:CRS84", "crs_type": "authority_code")"; + } else if (crs.rfind(srid_prefix, 0) == 0) { + return R"("crs": ")" + crs.substr(srid_prefix.size()) + R"(", "crs_type": "srid")"; + } else if (crs.rfind(projjson_prefix, 0) == 0) { + std::string metadata_field = crs.substr(projjson_prefix.size()); + if (metadata && metadata->Contains(metadata_field)) { + ARROW_ASSIGN_OR_RAISE(std::string projjson_value, metadata->Get(metadata_field)); + return R"("crs": )" + projjson_value + R"(, "crs_type": "projjson")"; + } else { + // Pass on the value of the field so the user can sort this out if needed + return R"("crs": )" + metadata_field + R"(, "crs_type": "projjson")"; + } + } else { + return Status::Invalid("Can't convert invalid Parquet CRS string to GeoArrow: ", crs); + } +} + +Result> MakeGeoArrowGeometryType( + const LogicalType& logical_type, + const std::shared_ptr& metadata) { + // Check if we have a registered GeoArrow type to read into + std::shared_ptr<::arrow::ExtensionType> maybe_geoarrow_wkb = + ::arrow::GetExtensionType("geoarrow.wkb"); + if (!maybe_geoarrow_wkb) { + return ::arrow::binary(); + } + + if (logical_type.is_geometry()) { + const auto& geospatial_type = checked_cast(logical_type); + ARROW_ASSIGN_OR_RAISE(std::string crs_metadata, + MakeGeoArrowCrsMetadata(geospatial_type.crs(), metadata)); + + std::string serialized_data = std::string("{") + crs_metadata + "}"; + return maybe_geoarrow_wkb->Deserialize(::arrow::binary(), serialized_data); + } else { + const auto& geospatial_type = checked_cast(logical_type); + ARROW_ASSIGN_OR_RAISE(std::string crs_metadata, + MakeGeoArrowCrsMetadata(geospatial_type.crs(), metadata)); + std::string edges_metadata = + R"("edges": ")" + std::string(geospatial_type.algorithm_name()) + R"(")"; + std::string serialized_data = + std::string("{") + crs_metadata + ", " + edges_metadata + "}"; + return maybe_geoarrow_wkb->Deserialize(::arrow::binary(), serialized_data); + } +} + Result> FromByteArray( - const LogicalType& logical_type, const ArrowReaderProperties& reader_properties) { + const LogicalType& logical_type, const ArrowReaderProperties& reader_properties, + const std::shared_ptr& metadata) { switch (logical_type.type()) { case LogicalType::Type::STRING: return ::arrow::utf8(); @@ -128,6 +183,18 @@ Result> FromByteArray( // When the original Arrow schema isn't stored and Arrow extensions are disabled, // LogicalType::JSON is read as utf8(). return ::arrow::utf8(); + case LogicalType::Type::GEOMETRY: + case LogicalType::Type::GEOGRAPHY: + if (reader_properties.get_arrow_extensions_enabled()) { + // Attempt creating a GeoArrow extension type (or return binary() if types are not + // registered) + return MakeGeoArrowGeometryType(logical_type, metadata); + } + + // When the original Arrow schema isn't stored, Arrow extensions are disabled, or + // the geoarrow.wkb extension type isn't registered, LogicalType::GEOMETRY and + // LogicalType::GEOGRAPHY are as binary(). + return ::arrow::binary(); default: return Status::NotImplemented("Unhandled logical logical_type ", logical_type.ToString(), " for binary array"); @@ -190,7 +257,8 @@ Result> FromInt64(const LogicalType& logical_type) { Result> GetArrowType( Type::type physical_type, const LogicalType& logical_type, int type_length, - const ArrowReaderProperties& reader_properties) { + const ArrowReaderProperties& reader_properties, + const std::shared_ptr& metadata) { if (logical_type.is_invalid() || logical_type.is_null()) { return ::arrow::null(); } @@ -209,7 +277,7 @@ Result> GetArrowType( case ParquetType::DOUBLE: return ::arrow::float64(); case ParquetType::BYTE_ARRAY: - return FromByteArray(logical_type, reader_properties); + return FromByteArray(logical_type, reader_properties, metadata); case ParquetType::FIXED_LEN_BYTE_ARRAY: return FromFLBA(logical_type, type_length); default: { @@ -222,9 +290,10 @@ Result> GetArrowType( Result> GetArrowType( const schema::PrimitiveNode& primitive, - const ArrowReaderProperties& reader_properties) { + const ArrowReaderProperties& reader_properties, + const std::shared_ptr& metadata) { return GetArrowType(primitive.physical_type(), *primitive.logical_type(), - primitive.type_length(), reader_properties); + primitive.type_length(), reader_properties, metadata); } } // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/schema_internal.h b/cpp/src/parquet/arrow/schema_internal.h index 58828f85ab8e3..da6bd52cc281b 100644 --- a/cpp/src/parquet/arrow/schema_internal.h +++ b/cpp/src/parquet/arrow/schema_internal.h @@ -29,8 +29,9 @@ namespace parquet::arrow { using ::arrow::Result; -Result> FromByteArray(const LogicalType& logical_type, - bool use_known_arrow_extensions); +Result> FromByteArray( + const LogicalType& logical_type, bool use_known_arrow_extensions, + const std::shared_ptr& metadata = nullptr); Result> FromFLBA(const LogicalType& logical_type, int32_t physical_length); Result> FromInt32(const LogicalType& logical_type); @@ -38,10 +39,12 @@ Result> FromInt64(const LogicalType& logical_ Result> GetArrowType( Type::type physical_type, const LogicalType& logical_type, int type_length, - const ArrowReaderProperties& reader_properties); + const ArrowReaderProperties& reader_properties, + const std::shared_ptr& metadata = nullptr); Result> GetArrowType( const schema::PrimitiveNode& primitive, - const ArrowReaderProperties& reader_properties); + const ArrowReaderProperties& reader_properties, + const std::shared_ptr& metadata = nullptr); } // namespace parquet::arrow diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index 4998e6f301a00..e85400782e654 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -788,6 +788,9 @@ class ColumnWriterImpl { // Plain-encoded statistics of the whole chunk virtual StatisticsPair GetChunkStatistics() = 0; + // Plain-encoded geometry statistics of the whole chunk + virtual EncodedGeospatialStatistics GetChunkGeospatialStatistics() = 0; + // Merges page statistics into chunk statistics, then resets the values virtual void ResetPageStatistics() = 0; @@ -1102,9 +1105,18 @@ int64_t ColumnWriterImpl::Close() { if (rows_written_ > 0 && chunk_statistics.is_set()) { metadata_->SetStatistics(chunk_statistics); } + if (rows_written_ > 0 && chunk_size_statistics.is_set()) { metadata_->SetSizeStatistics(chunk_size_statistics); } + + if (descr_->logical_type() != nullptr && descr_->logical_type()->is_geometry()) { + EncodedGeospatialStatistics geometry_stats = GetChunkGeospatialStatistics(); + if (geometry_stats.is_set()) { + metadata_->SetGeospatialStatistics(geometry_stats); + } + } + metadata_->SetKeyValueMetadata(key_value_metadata_); pager_->Close(has_dictionary_, fallback_); } @@ -1225,10 +1237,14 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< // Will be null if not using dictionary, but that's ok current_dict_encoder_ = dynamic_cast*>(current_encoder_.get()); - if (properties->statistics_enabled(descr_->path()) && - (SortOrder::UNKNOWN != descr_->sort_order())) { - page_statistics_ = MakeStatistics(descr_, allocator_); - chunk_statistics_ = MakeStatistics(descr_, allocator_); + if (properties->statistics_enabled(descr_->path())) { + if (SortOrder::UNKNOWN != descr_->sort_order()) { + page_statistics_ = MakeStatistics(descr_, allocator_); + chunk_statistics_ = MakeStatistics(descr_, allocator_); + } + if (descr_->logical_type() != nullptr && descr_->logical_type()->is_geometry()) { + chunk_geometry_statistics_ = std::make_shared(); + } } if (properties->size_statistics_level() == SizeStatisticsLevel::ColumnChunk || properties->size_statistics_level() == SizeStatisticsLevel::PageAndColumnChunk) { @@ -1396,6 +1412,12 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< return result; } + EncodedGeospatialStatistics GetChunkGeospatialStatistics() override { + EncodedGeospatialStatistics result; + if (chunk_geometry_statistics_) result = chunk_geometry_statistics_->Encode(); + return result; + } + void ResetPageStatistics() override { if (chunk_statistics_ != nullptr) { chunk_statistics_->Merge(*page_statistics_); @@ -1460,6 +1482,7 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< std::shared_ptr chunk_statistics_; std::unique_ptr page_size_statistics_; std::shared_ptr chunk_size_statistics_; + std::shared_ptr chunk_geometry_statistics_; bool pages_change_on_record_boundaries_; // If writing a sequence of ::arrow::DictionaryArray to the writer, we keep the @@ -1685,7 +1708,14 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< if (page_statistics_ != nullptr) { page_statistics_->Update(values, num_values, num_nulls); } + UpdateUnencodedDataBytes(); + + if constexpr (std::is_same::value) { + if (chunk_geometry_statistics_ != nullptr) { + chunk_geometry_statistics_->Update(values, num_values, num_nulls); + } + } } /// \brief Write values with spaces and update page statistics accordingly. @@ -1714,7 +1744,16 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< page_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset, num_spaced_values, num_values, num_nulls); } + UpdateUnencodedDataBytes(); + + if constexpr (std::is_same::value) { + if (chunk_geometry_statistics_ != nullptr) { + chunk_geometry_statistics_->UpdateSpaced(values, valid_bits, valid_bits_offset, + num_spaced_values, num_values, + num_nulls); + } + } } }; @@ -1792,6 +1831,12 @@ Status TypedColumnWriterImpl::WriteArrowDictionary( page_statistics_->IncrementNullCount(num_chunk_levels - non_null_count); page_statistics_->IncrementNumValues(non_null_count); page_statistics_->Update(*referenced_dictionary, /*update_counts=*/false); + + if constexpr (std::is_same::value) { + if (chunk_geometry_statistics_ != nullptr) { + chunk_geometry_statistics_->Update(*referenced_dictionary); + } + } }; int64_t value_offset = 0; @@ -2297,7 +2342,12 @@ Status TypedColumnWriterImpl::WriteArrowDense( page_statistics_->IncrementNullCount(batch_size - non_null); page_statistics_->IncrementNumValues(non_null); } + UpdateUnencodedDataBytes(); + + if (chunk_geometry_statistics_ != nullptr) { + chunk_geometry_statistics_->Update(*data_slice); + } CommitWriteAndCheckPageLimit(batch_size, batch_num_values, batch_size - non_null, check_page); CheckDictionarySizeLimit(); diff --git a/cpp/src/parquet/column_writer_test.cc b/cpp/src/parquet/column_writer_test.cc index 744859cf0f037..49ed583396b7f 100644 --- a/cpp/src/parquet/column_writer_test.cc +++ b/cpp/src/parquet/column_writer_test.cc @@ -399,6 +399,26 @@ class TestPrimitiveWriter : public PrimitiveTypedTest { return metadata_accessor->key_value_metadata(); } + std::unique_ptr metadata_accessor() { + // Metadata accessor must be created lazily. + // This is because the ColumnChunkMetaData semantics dictate the metadata object is + // complete (no changes to the metadata buffer can be made after instantiation) + return ColumnChunkMetaData::Make(metadata_->contents(), this->descr_); + } + + EncodedStatistics metadata_encoded_stats() { return metadata_stats()->Encode(); } + + std::shared_ptr metadata_stats() { + return metadata_accessor()->statistics(); + } + + std::shared_ptr metadata_geometry_stats() { + ApplicationVersion app_version(this->writer_properties_->created_by()); + auto metadata_accessor = ColumnChunkMetaData::Make( + metadata_->contents(), this->descr_, default_reader_properties(), &app_version); + return metadata_accessor->geometry_statistics(); + } + protected: int64_t values_read_; // Keep the reader alive as for ByteArray the lifetime of the ByteArray @@ -1807,5 +1827,164 @@ TEST_F(TestValuesWriterInt32Type, AllNullsCompressionInPageV2) { } } +// Test writing and reading geometry columns +class TestGeometryValuesWriter : public TestPrimitiveWriter { + public: + void SetUpSchema(Repetition::type repetition, int num_columns) override { + std::vector fields; + + for (int i = 0; i < num_columns; ++i) { + std::string name = TestColumnName(i); + std::shared_ptr logical_type = + GeometryLogicalType::Make("srid:1234"); + fields.push_back(schema::PrimitiveNode::Make(name, repetition, logical_type, + ByteArrayType::type_num)); + } + node_ = schema::GroupNode::Make("schema", Repetition::REQUIRED, fields); + schema_.Init(node_); + } + + void GenerateData(int64_t num_values, uint32_t seed = 0) { + values_.resize(num_values); + + buffer_.resize(num_values * kWkbPointSize); + uint8_t* ptr = buffer_.data(); + for (int k = 0; k < num_values; k++) { + GenerateWKBPoint(ptr, k, k + 1); + values_[k].len = kWkbPointSize; + values_[k].ptr = ptr; + ptr += kWkbPointSize; + } + + values_ptr_ = values_.data(); + } + + void TestWriteAndRead(ParquetVersion::type version, + ParquetDataPageVersion data_page_version) { + this->SetUpSchema(Repetition::REQUIRED, 1); + this->GenerateData(SMALL_SIZE); + size_t num_values = this->values_.size(); + auto writer = + this->BuildWriter(num_values, ColumnProperties(), version, data_page_version, + /*enable_checksum*/ false); + writer->WriteBatch(this->values_.size(), nullptr, nullptr, this->values_.data()); + + writer->Close(); + this->ReadColumn(); + for (size_t i = 0; i < num_values; i++) { + const ByteArray& value = this->values_out_[i]; + double x = 0; + double y = 0; + EXPECT_TRUE(GetWKBPointCoordinate(value, &x, &y)); + auto expected_x = static_cast(i); + auto expected_y = static_cast(i + 1); + EXPECT_DOUBLE_EQ(expected_x, x); + EXPECT_DOUBLE_EQ(expected_y, y); + } + + std::shared_ptr geometry_statistics = metadata_geometry_stats(); + ASSERT_TRUE(geometry_statistics != nullptr); + std::vector geospatial_types = geometry_statistics->GetGeometryTypes(); + EXPECT_EQ(1, geospatial_types.size()); + EXPECT_EQ(1, geospatial_types[0]); + EXPECT_DOUBLE_EQ(0, geometry_statistics->GetXMin()); + EXPECT_DOUBLE_EQ(1, geometry_statistics->GetYMin()); + EXPECT_DOUBLE_EQ(99, geometry_statistics->GetXMax()); + EXPECT_DOUBLE_EQ(100, geometry_statistics->GetYMax()); + EXPECT_FALSE(geometry_statistics->HasZ()); + EXPECT_FALSE(geometry_statistics->HasM()); + } + + void TestWriteAndReadSpaced(ParquetVersion::type version, + ParquetDataPageVersion data_page_version) { + this->SetUpSchema(Repetition::OPTIONAL, 1); + this->GenerateData(SMALL_SIZE); + size_t num_values = this->values_.size(); + + std::vector definition_levels(num_values, 1); + std::vector repetition_levels(num_values, 0); + std::vector non_null_indices; + + // Replace some of the generated data with NULL + for (size_t i = 0; i < num_values; i++) { + if (i % 3 == 0) { + definition_levels[i] = 0; + } else { + non_null_indices.push_back(i); + } + } + + // Construct valid bits using definition levels + std::vector valid_bytes(num_values); + std::transform(definition_levels.begin(), definition_levels.end(), + valid_bytes.begin(), + [&](int64_t level) { return static_cast(level); }); + std::shared_ptr valid_bits; + ASSERT_OK_AND_ASSIGN(valid_bits, ::arrow::internal::BytesToBits(valid_bytes)); + + auto writer = + this->BuildWriter(num_values, ColumnProperties(), version, data_page_version, + /*enable_checksum*/ false); + writer->WriteBatchSpaced(this->values_.size(), definition_levels.data(), + repetition_levels.data(), valid_bits->data(), 0, + this->values_.data()); + + writer->Close(); + this->ReadColumn(); + size_t expected_values_read = non_null_indices.size(); + EXPECT_EQ(expected_values_read, values_read_); + for (int64_t i = 0; i < values_read_; i++) { + const ByteArray& value = this->values_out_[i]; + double x = 0; + double y = 0; + EXPECT_TRUE(GetWKBPointCoordinate(value, &x, &y)); + auto expected_x = static_cast(non_null_indices[i]); + auto expected_y = static_cast(non_null_indices[i] + 1); + EXPECT_DOUBLE_EQ(expected_x, x); + EXPECT_DOUBLE_EQ(expected_y, y); + } + + std::shared_ptr geometry_statistics = metadata_geometry_stats(); + ASSERT_TRUE(geometry_statistics != nullptr); + std::vector geospatial_types = geometry_statistics->GetGeometryTypes(); + EXPECT_EQ(1, geospatial_types.size()); + EXPECT_EQ(1, geospatial_types[0]); + EXPECT_DOUBLE_EQ(1, geometry_statistics->GetXMin()); + EXPECT_DOUBLE_EQ(2, geometry_statistics->GetYMin()); + EXPECT_DOUBLE_EQ(98, geometry_statistics->GetXMax()); + EXPECT_DOUBLE_EQ(99, geometry_statistics->GetYMax()); + EXPECT_FALSE(geometry_statistics->HasZ()); + EXPECT_FALSE(geometry_statistics->HasM()); + } +}; + +TEST_F(TestGeometryValuesWriter, TestWriteAndReadV1) { + for (auto data_page_version : + {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { + TestWriteAndRead(ParquetVersion::PARQUET_1_0, data_page_version); + } +} + +TEST_F(TestGeometryValuesWriter, TestWriteAndReadV2) { + for (auto data_page_version : + {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { + TestWriteAndRead(ParquetVersion::PARQUET_2_4, data_page_version); + } +} + +TEST_F(TestGeometryValuesWriter, TestWriteAndReadV1Spaced) { + for (auto data_page_version : + {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { + TestWriteAndReadSpaced(ParquetVersion::PARQUET_1_0, data_page_version); + } +} + +TEST_F(TestGeometryValuesWriter, TestWriteAndReadV2Spaced) { + for (auto data_page_version : + {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { + TestWriteAndReadSpaced(ParquetVersion::PARQUET_2_4, data_page_version); + } +} + } // namespace test } // namespace parquet diff --git a/cpp/src/parquet/geometry_statistics.cc b/cpp/src/parquet/geometry_statistics.cc new file mode 100644 index 0000000000000..e58cbf3d9e44e --- /dev/null +++ b/cpp/src/parquet/geometry_statistics.cc @@ -0,0 +1,291 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "parquet/geometry_statistics.h" +#include + +#include "arrow/array.h" +#include "arrow/type.h" +#include "arrow/util/bit_run_reader.h" +#include "arrow/util/logging.h" +#include "parquet/exception.h" +#include "parquet/geometry_util_internal.h" + +using arrow::util::SafeLoad; + +namespace parquet { + +class GeospatialStatisticsImpl { + public: + GeospatialStatisticsImpl() = default; + GeospatialStatisticsImpl(const GeospatialStatisticsImpl&) = default; + + bool Equals(const GeospatialStatisticsImpl& other) const { + if (is_valid_ != other.is_valid_) { + return false; + } + + if (!is_valid_ && !other.is_valid_) { + return true; + } + + auto geospatial_types = bounder_.GeometryTypes(); + auto other_geospatial_types = other.bounder_.GeometryTypes(); + if (geospatial_types.size() != other_geospatial_types.size()) { + return false; + } + + for (size_t i = 0; i < geospatial_types.size(); i++) { + if (geospatial_types[i] != other_geospatial_types[i]) { + return false; + } + } + + return bounder_.Bounds() == other.bounder_.Bounds(); + } + + void Merge(const GeospatialStatisticsImpl& other) { + is_valid_ = is_valid_ && other.is_valid_; + bounder_.ReadBox(other.bounder_.Bounds()); + bounder_.ReadGeometryTypes(other.bounder_.GeometryTypes()); + } + + void Update(const ByteArray* values, int64_t num_values, int64_t null_count) { + if (!is_valid_) { + return; + } + + for (int64_t i = 0; i < num_values; i++) { + const ByteArray& item = values[i]; + ::arrow::Status status = bounder_.ReadGeometry(item.ptr, item.len); + if (!status.ok()) { + is_valid_ = false; + return; + } + } + } + + void UpdateSpaced(const ByteArray* values, const uint8_t* valid_bits, + int64_t valid_bits_offset, int64_t num_spaced_values, + int64_t num_values, int64_t null_count) { + DCHECK_GT(num_spaced_values, 0); + + if (!is_valid_) { + return; + } + + ::arrow::Status status = ::arrow::internal::VisitSetBitRuns( + valid_bits, valid_bits_offset, num_spaced_values, + [&](int64_t position, int64_t length) { + for (int64_t i = 0; i < length; i++) { + ByteArray item = SafeLoad(values + i + position); + ARROW_RETURN_NOT_OK(bounder_.ReadGeometry(item.ptr, item.len)); + } + + return ::arrow::Status::OK(); + }); + + if (!status.ok()) { + is_valid_ = false; + } + } + + void Update(const ::arrow::Array& values) { + if (!is_valid_) { + return; + } + + // Note that ::arrow::Type::EXTENSION seems to be handled before this is called + switch (values.type_id()) { + case ::arrow::Type::BINARY: + UpdateArrayImpl<::arrow::BinaryArray>(values); + break; + case ::arrow::Type::LARGE_BINARY: + UpdateArrayImpl<::arrow::LargeBinaryArray>(values); + break; + // This does not currently handle run-end encoded, dictionary encodings, or views + default: + throw ParquetException( + "Unsupported Array type in GeospatialStatistics::Update(Array): ", + values.type()->ToString()); + } + } + + void Reset() { + bounder_.Reset(); + is_valid_ = true; + } + + EncodedGeospatialStatistics Encode() const { + if (!is_valid_) { + return {}; + } + + const geometry::BoundingBox::XYZM& mins = bounder_.Bounds().min; + const geometry::BoundingBox::XYZM& maxes = bounder_.Bounds().max; + + EncodedGeospatialStatistics out; + out.geospatial_types = bounder_.GeometryTypes(); + + out.xmin = mins[0]; + out.xmax = maxes[0]; + out.ymin = mins[1]; + out.ymax = maxes[1]; + out.zmin = mins[2]; + out.zmax = maxes[2]; + out.mmin = mins[3]; + out.mmax = maxes[3]; + + return out; + } + + void Update(const EncodedGeospatialStatistics& encoded) { + if (!is_valid_) { + return; + } + + geometry::BoundingBox box; + box.min[0] = encoded.xmin; + box.max[0] = encoded.xmax; + box.min[1] = encoded.ymin; + box.max[1] = encoded.ymax; + + if (encoded.has_z()) { + box.min[2] = encoded.zmin; + box.max[2] = encoded.zmax; + } + + if (encoded.has_m()) { + box.min[3] = encoded.mmin; + box.max[3] = encoded.mmax; + } + + bounder_.ReadBox(box); + bounder_.ReadGeometryTypes(encoded.geospatial_types); + } + + bool is_valid() const { return is_valid_; } + + const std::array& GetMinBounds() const { return bounder_.Bounds().min; } + + const std::array& GetMaxBounds() { return bounder_.Bounds().max; } + + std::vector GetGeometryTypes() const { return bounder_.GeometryTypes(); } + + private: + geometry::WKBGeometryBounder bounder_; + bool is_valid_ = true; + + template + void UpdateArrayImpl(const ::arrow::Array& values) { + const auto& binary_array = static_cast(values); + for (int64_t i = 0; i < binary_array.length(); ++i) { + if (!binary_array.IsNull(i)) { + std::string_view byte_array = binary_array.GetView(i); + ::arrow::Status status = bounder_.ReadGeometry( + reinterpret_cast(byte_array.data()), byte_array.length()); + if (!status.ok()) { + is_valid_ = false; + return; + } + } + } + } +}; + +GeospatialStatistics::GeospatialStatistics() + : impl_(std::make_unique()) {} + +GeospatialStatistics::GeospatialStatistics(std::unique_ptr impl) + : impl_(std::move(impl)) {} + +GeospatialStatistics::GeospatialStatistics(const EncodedGeospatialStatistics& encoded) + : GeospatialStatistics() { + Decode(encoded); +} + +GeospatialStatistics::GeospatialStatistics(GeospatialStatistics&&) = default; + +GeospatialStatistics::~GeospatialStatistics() = default; + +bool GeospatialStatistics::Equals(const GeospatialStatistics& other) const { + return impl_->Equals(*other.impl_); +} + +void GeospatialStatistics::Merge(const GeospatialStatistics& other) { + impl_->Merge(*other.impl_); +} + +void GeospatialStatistics::Update(const ByteArray* values, int64_t num_values, + int64_t null_count) { + impl_->Update(values, num_values, null_count); +} + +void GeospatialStatistics::UpdateSpaced(const ByteArray* values, + const uint8_t* valid_bits, + int64_t valid_bits_offset, + int64_t num_spaced_values, int64_t num_values, + int64_t null_count) { + impl_->UpdateSpaced(values, valid_bits, valid_bits_offset, num_spaced_values, + num_values, null_count); +} + +void GeospatialStatistics::Update(const ::arrow::Array& values) { impl_->Update(values); } + +void GeospatialStatistics::Reset() { impl_->Reset(); } + +bool GeospatialStatistics::is_valid() const { return impl_->is_valid(); } + +EncodedGeospatialStatistics GeospatialStatistics::Encode() const { + return impl_->Encode(); +} + +void GeospatialStatistics::Decode(const EncodedGeospatialStatistics& encoded) { + impl_->Update(encoded); +} + +std::shared_ptr GeospatialStatistics::clone() const { + std::unique_ptr impl = + std::make_unique(*impl_); + return std::make_shared(std::move(impl)); +} + +double GeospatialStatistics::GetXMin() const { return impl_->GetMinBounds()[0]; } + +double GeospatialStatistics::GetXMax() const { return impl_->GetMaxBounds()[0]; } + +double GeospatialStatistics::GetYMin() const { return impl_->GetMinBounds()[1]; } + +double GeospatialStatistics::GetYMax() const { return impl_->GetMaxBounds()[1]; } + +double GeospatialStatistics::GetZMin() const { return impl_->GetMinBounds()[2]; } + +double GeospatialStatistics::GetZMax() const { return impl_->GetMaxBounds()[2]; } + +double GeospatialStatistics::GetMMin() const { return impl_->GetMinBounds()[3]; } + +double GeospatialStatistics::GetMMax() const { return impl_->GetMaxBounds()[3]; } + +bool GeospatialStatistics::HasZ() const { return (GetZMax() - GetZMin()) > 0; } + +bool GeospatialStatistics::HasM() const { return (GetMMax() - GetMMin()) > 0; } + +std::vector GeospatialStatistics::GetGeometryTypes() const { + return impl_->GetGeometryTypes(); +} + +} // namespace parquet diff --git a/cpp/src/parquet/geometry_statistics.h b/cpp/src/parquet/geometry_statistics.h new file mode 100644 index 0000000000000..16e73ae26c9f1 --- /dev/null +++ b/cpp/src/parquet/geometry_statistics.h @@ -0,0 +1,164 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +#include "parquet/platform.h" +#include "parquet/types.h" + +namespace parquet { + +/// \brief Structure represented encoded statistics to be written to and read from Parquet +/// serialized metadata. +/// +/// See the Parquet Thrift definition and GeospatialStatistics for the specific definition +/// of field values. +class PARQUET_EXPORT EncodedGeospatialStatistics { + public: + static constexpr double kInf = std::numeric_limits::infinity(); + + EncodedGeospatialStatistics() = default; + EncodedGeospatialStatistics(const EncodedGeospatialStatistics&) = default; + EncodedGeospatialStatistics(EncodedGeospatialStatistics&&) = default; + EncodedGeospatialStatistics& operator=(const EncodedGeospatialStatistics&) = default; + + double xmin{kInf}; + double xmax{-kInf}; + double ymin{kInf}; + double ymax{-kInf}; + double zmin{kInf}; + double zmax{-kInf}; + double mmin{kInf}; + double mmax{-kInf}; + std::vector geospatial_types; + + bool has_z() const { return (zmax - zmin) != -kInf; } + + bool has_m() const { return (mmax - mmin) != -kInf; } + + bool is_set() const { return !geospatial_types.empty(); } +}; + +class GeospatialStatisticsImpl; + +/// \brief Base type for computing geospatial column statistics while writing a file +class PARQUET_EXPORT GeospatialStatistics { + public: + GeospatialStatistics(); + explicit GeospatialStatistics(std::unique_ptr impl); + explicit GeospatialStatistics(const EncodedGeospatialStatistics& encoded); + GeospatialStatistics(GeospatialStatistics&&); + + ~GeospatialStatistics(); + + /// \brief Return true if bounds, geometry types, and validity are identical + bool Equals(const GeospatialStatistics& other) const; + + /// \brief Update these statistics based on previously calculated or decoded statistics + void Merge(const GeospatialStatistics& other); + + /// \brief Update these statistics based on values + void Update(const ByteArray* values, int64_t num_values, int64_t null_count); + + /// \brief Update these statistics based on the non-null elements of values + void UpdateSpaced(const ByteArray* values, const uint8_t* valid_bits, + int64_t valid_bits_offset, int64_t num_spaced_values, + int64_t num_values, int64_t null_count); + + /// \brief Update these statistics based on the non-null elements of values + /// + /// Currently, BinaryArray and LargeBinaryArray input is supported. + void Update(const ::arrow::Array& values); + + /// \brief Return these statistics to an empty state + void Reset(); + + /// \brief Encode the statistics for serializing to Thrift + /// + /// If invalid WKB was encountered, empty encoded statistics are returned + /// (such that is_set() returns false and they should not be written). + EncodedGeospatialStatistics Encode() const; + + /// \brief Returns true if all WKB encountered was valid or false otherwise + bool is_valid() const; + + std::shared_ptr clone() const; + + /// \brief Update these statistics with previously generated statistics + void Decode(const EncodedGeospatialStatistics& encoded); + + /// \brief The minimum encountered value in the X dimension, or Inf if no X values were + /// encountered. + /// + /// The Parquet definition allows for "wrap around" bounds where xmin > xmax. In this + /// case, these bounds represent the union of the intervals [xmax, Inf] and [-Inf, + /// xmin]. This implementation does not yet generate these types of bounds but they may + /// be encountered in files written by other readers. + double GetXMin() const; + + /// \brief The maximum encountered value in the X dimension, or -Inf if no X values were + /// encountered, subject to "wrap around" bounds (see GetXMin()). + double GetXMax() const; + + /// \brief The minimum encountered value in the Y dimension, or Inf if no Y values were + /// encountered. + /// + /// The Parquet definition allows for "wrap around" bounds where ymin > ymax. In this + /// case, these bounds represent the union of the intervals [ymax, Inf] and [-Inf, + /// ymin]. This implementation does not yet generate these types of bounds but they may + /// be encountered in files written by other readers. + double GetYMin() const; + + /// \brief The maximum encountered value in the Y dimension, or -Inf if no Y values were + /// encountered, subject to "wrap around" bounds (see GetXMin()). + double GetYMax() const; + + /// \brief The minimum encountered value in the Z dimension, or Inf if no Z values were + /// encountered. Wrap around bounds are not permitted in the Z dimension. + double GetZMin() const; + + /// \brief The maximum encountered value in the Z dimension, or -Inf if no Z values were + /// encountered. Wrap around bounds are not permitted in the Z dimension. + double GetZMax() const; + + /// \brief The minimum encountered value in the M dimension, or Inf if no M values were + /// encountered. Wrap around bounds are not permitted in the M dimension. + double GetMMin() const; + + /// \brief The maximum encountered value in the M dimension, or -Inf if no M values were + /// encountered. Wrap around bounds are not permitted in the M dimension. + double GetMMax() const; + + /// \brief Returns true if any Z values were encountered or false otherwise + bool HasZ() const; + + /// \brief Returns true if any M values were encountered or false otherwise + bool HasM() const; + + /// \brief Return the geometry type codes from the well-known binary encountered + /// + /// This implementation always returns sorted output with no duplicates. + std::vector GetGeometryTypes() const; + + private: + std::unique_ptr impl_; +}; + +} // namespace parquet diff --git a/cpp/src/parquet/geometry_statistics_test.cc b/cpp/src/parquet/geometry_statistics_test.cc new file mode 100644 index 0000000000000..b6af487b316e5 --- /dev/null +++ b/cpp/src/parquet/geometry_statistics_test.cc @@ -0,0 +1,192 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include "arrow/array/builder_binary.h" +#include "arrow/compute/api.h" +#include "arrow/testing/gtest_util.h" + +#include "geometry_statistics.h" +#include "parquet/test_util.h" + +namespace parquet::geometry { + +TEST(TestGeospatialStatistics, TestDefaults) { + GeospatialStatistics stats; + EXPECT_EQ(stats.GetGeometryTypes().size(), 0); + EXPECT_TRUE(stats.is_valid()); + EXPECT_FALSE(stats.HasZ()); + EXPECT_FALSE(stats.HasM()); + EXPECT_EQ(stats.GetXMax() - stats.GetXMin(), -kInf); + EXPECT_EQ(stats.GetYMax() - stats.GetYMin(), -kInf); + EXPECT_EQ(stats.GetZMax() - stats.GetZMin(), -kInf); + EXPECT_EQ(stats.GetMMax() - stats.GetMMin(), -kInf); + EXPECT_TRUE(stats.Equals(GeospatialStatistics())); + + auto encoded = stats.Encode(); + EXPECT_FALSE(encoded.is_set()); + EXPECT_FALSE(encoded.has_z()); + EXPECT_FALSE(encoded.has_m()); + EXPECT_TRUE(GeospatialStatistics(encoded).Equals(stats)); + + stats.Merge(GeospatialStatistics()); + EXPECT_TRUE(GeospatialStatistics(encoded).Equals(stats)); +} + +TEST(TestGeospatialStatistics, TestUpdateByteArray) { + GeospatialStatistics stats; + + double xyzm0[] = {10, 11, 12, 13}; + std::string xyzm_wkb0 = test::MakeWKBPoint(xyzm0, true, true); + ByteArray item0{static_cast(xyzm_wkb0.size()), + reinterpret_cast(xyzm_wkb0.data())}; + + stats.Update(&item0, 1, 0); + EXPECT_EQ(stats.GetXMin(), 10); + EXPECT_EQ(stats.GetXMax(), 10); + EXPECT_EQ(stats.GetYMin(), 11); + EXPECT_EQ(stats.GetYMax(), 11); + EXPECT_EQ(stats.GetZMin(), 12); + EXPECT_EQ(stats.GetZMax(), 12); + EXPECT_EQ(stats.GetMMin(), 13); + EXPECT_EQ(stats.GetMMax(), 13); + EXPECT_THAT(stats.GetGeometryTypes(), ::testing::ElementsAre(3001)); + + double xyzm1[] = {20, 21, 22, 23}; + std::string xyzm_wkb1 = test::MakeWKBPoint(xyzm1, true, true); + ByteArray item1{static_cast(xyzm_wkb1.size()), + reinterpret_cast(xyzm_wkb1.data())}; + + stats.Update(&item1, 1, 0); + EXPECT_EQ(stats.GetXMin(), 10); + EXPECT_EQ(stats.GetXMax(), 20); + EXPECT_EQ(stats.GetYMin(), 11); + EXPECT_EQ(stats.GetYMax(), 21); + EXPECT_EQ(stats.GetZMin(), 12); + EXPECT_EQ(stats.GetZMax(), 22); + EXPECT_EQ(stats.GetMMin(), 13); + EXPECT_EQ(stats.GetMMax(), 23); + EXPECT_THAT(stats.GetGeometryTypes(), ::testing::ElementsAre(3001)); + + // Check recreating the statistics with actual values + auto encoded = stats.Encode(); + EXPECT_TRUE(GeospatialStatistics(encoded).Equals(stats)); + + // Check resetting to the original state + stats.Reset(); + EXPECT_TRUE(stats.Equals(GeospatialStatistics())); + + // Check UpdateSpaced() + + // A null value that should be skipped + double xyzm2[] = {-30, -31, -32, -33}; + std::string xyzm_wkb2 = test::MakeWKBPoint(xyzm2, true, true); + ByteArray item2{static_cast(xyzm_wkb2.size()), + reinterpret_cast(xyzm_wkb2.data())}; + + // A non-null value that shouldn't be skipped + double xyzm3[] = {30, 31, 32, 33}; + std::string xyzm_wkb3 = test::MakeWKBPoint(xyzm3, true, true); + ByteArray item3{static_cast(xyzm_wkb3.size()), + reinterpret_cast(xyzm_wkb3.data())}; + + ByteArray items[] = {item0, item1, item2, item3}; + // Validity bitmap with an extra bit on the front to check non-zero bits offset + uint8_t validity = 0b00010111; + GeospatialStatistics stats_spaced; + stats_spaced.UpdateSpaced(items, &validity, 1, 4, 4, 1); + + EXPECT_EQ(stats_spaced.GetXMin(), 10); + EXPECT_EQ(stats_spaced.GetXMax(), 30); + EXPECT_EQ(stats_spaced.GetYMin(), 11); + EXPECT_EQ(stats_spaced.GetYMax(), 31); + EXPECT_EQ(stats_spaced.GetZMin(), 12); + EXPECT_EQ(stats_spaced.GetZMax(), 32); + EXPECT_EQ(stats_spaced.GetMMin(), 13); + EXPECT_EQ(stats_spaced.GetMMax(), 33); + EXPECT_THAT(stats_spaced.GetGeometryTypes(), ::testing::ElementsAre(3001)); + + // Check merge + stats.Merge(stats_spaced); + EXPECT_TRUE(stats.Equals(stats_spaced)); + + // Check ingest of invalid WKB + ByteArray invalid; + stats.Update(&invalid, 1, 0); + EXPECT_FALSE(stats.is_valid()); + EXPECT_FALSE(stats.Encode().is_set()); + + // This should be true even after ingesting more values + stats.Update(&item0, 1, 0); + EXPECT_FALSE(stats.is_valid()); + EXPECT_FALSE(stats.Encode().is_set()); + + // And should cause other statistics to become invalid when merged with them + stats_spaced.Merge(stats); + EXPECT_FALSE(stats_spaced.is_valid()); + EXPECT_FALSE(stats_spaced.Encode().is_set()); +} + +TEST(TestGeospatialStatistics, TestUpdateArray) { + // Build WKB array with a null from POINT (0 1)...POINT (14, 15) + ::arrow::BinaryBuilder builder; + std::array item; + for (int k = 0; k < 10; k++) { + test::GenerateWKBPoint(reinterpret_cast(item.data()), k, k + 1); + ASSERT_OK(builder.AppendValues({std::string(item.data(), item.size())})); + } + + ASSERT_OK(builder.AppendNull()); + + for (int k = 10; k < 15; k++) { + test::GenerateWKBPoint(reinterpret_cast(item.data()), k, k + 1); + ASSERT_OK(builder.AppendValues({std::string(item.data(), item.size())})); + } + + // Ensure we have both a binary array and a large binary array to work with + ASSERT_OK_AND_ASSIGN(const auto binary_array, builder.Finish()); + ASSERT_OK_AND_ASSIGN(const auto large_binary_array, + ::arrow::compute::Cast(binary_array, ::arrow::large_binary())); + + GeospatialStatistics stats; + stats.Update(*binary_array); + EXPECT_EQ(stats.GetXMin(), 0); + EXPECT_EQ(stats.GetYMin(), 1); + EXPECT_EQ(stats.GetXMax(), 14); + EXPECT_EQ(stats.GetYMax(), 15); + + GeospatialStatistics stats_large; + stats_large.Update(*large_binary_array.make_array()); + + EXPECT_TRUE(stats_large.Equals(stats)); +} + +TEST(TestGeospatialStatistics, TestUpdateArrayInvalid) { + // Build WKB array with invalid WKB (here, an empty string) + ::arrow::BinaryBuilder builder; + ASSERT_OK(builder.AppendValues({std::string()})); + ASSERT_OK_AND_ASSIGN(const auto invalid_wkb, builder.Finish()); + + GeospatialStatistics stats; + stats.Update(*invalid_wkb); + EXPECT_FALSE(stats.is_valid()); + EXPECT_FALSE(stats.Encode().is_set()); +} + +} // namespace parquet::geometry diff --git a/cpp/src/parquet/geometry_util_internal.cc b/cpp/src/parquet/geometry_util_internal.cc new file mode 100644 index 0000000000000..16e42e8adf73f --- /dev/null +++ b/cpp/src/parquet/geometry_util_internal.cc @@ -0,0 +1,234 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "parquet/geometry_util_internal.h" + +#include "arrow/result.h" +#include "arrow/util/endian.h" +#include "arrow/util/macros.h" +#include "arrow/util/ubsan.h" + +namespace parquet::geometry { + +/// \brief Object to keep track of the low-level consumption of a well-known binary +/// geometry +/// +/// Briefly, ISO well-known binary supported by the Parquet spec is an endian byte +/// (0x01 or 0x00), followed by geometry type + dimensions encoded as a (uint32_t), +/// followed by geometry-specific data. Coordinate sequences are represented by a +/// uint32_t (the number of coordinates) plus a sequence of doubles (number of coordinates +/// multiplied by the number of dimensions). +class WKBBuffer { + public: + WKBBuffer() : data_(NULLPTR), size_(0) {} + WKBBuffer(const uint8_t* data, int64_t size) : data_(data), size_(size) {} + + void Init(const uint8_t* data, int64_t size) { + data_ = data; + size_ = size; + } + + ::arrow::Result ReadUInt8() { return ReadChecked(); } + + ::arrow::Result ReadUInt32(bool swap) { + ARROW_ASSIGN_OR_RAISE(auto value, ReadChecked()); + if (ARROW_PREDICT_FALSE(swap)) { + return ByteSwap(value); + } else { + return value; + } + } + + template + ::arrow::Status ReadDoubles(uint32_t n_coords, bool swap, Visit&& visit) { + size_t total_bytes = n_coords * sizeof(Coord); + if (size_ < total_bytes) { + return ::arrow::Status::SerializationError( + "Can't coordinate sequence of ", total_bytes, " bytes from WKBBuffer with ", + size_, " remaining"); + } + + if (ARROW_PREDICT_FALSE(swap)) { + Coord coord; + for (uint32_t i = 0; i < n_coords; i++) { + coord = ReadUnchecked(); + for (uint32_t j = 0; j < coord.size(); j++) { + coord[j] = ByteSwap(coord[j]); + } + + visit(coord); + } + } else { + for (uint32_t i = 0; i < n_coords; i++) { + visit(ReadUnchecked()); + } + } + + return ::arrow::Status::OK(); + } + + size_t size() { return size_; } + + private: + const uint8_t* data_; + size_t size_; + + template + ::arrow::Result ReadChecked() { + if (size_ < sizeof(T)) { + return ::arrow::Status::SerializationError( + "Can't read ", sizeof(T), " bytes from WKBBuffer with ", size_, " remaining"); + } + + return ReadUnchecked(); + } + + template + T ReadUnchecked() { + T out = ::arrow::util::SafeLoadAs(data_); + data_ += sizeof(T); + size_ -= sizeof(T); + return out; + } + + template + T ByteSwap(T value) { + return ::arrow::bit_util::ByteSwap(value); + } +}; + +using GeometryTypeAndDimensions = std::pair; + +namespace { + +::arrow::Result ParseGeometryType(uint32_t wkb_geometry_type) { + // The number 1000 can be used because WKB geometry types are constructed + // on purpose such that this relationship is true (e.g., LINESTRING ZM maps + // to 3002). + uint32_t geometry_type_component = wkb_geometry_type % 1000; + uint32_t dimensions_component = wkb_geometry_type / 1000; + + auto min_geometry_type_value = static_cast(GeometryType::MIN); + auto max_geometry_type_value = static_cast(GeometryType::MAX); + auto min_dimension_value = static_cast(Dimensions::MIN); + auto max_dimension_value = static_cast(Dimensions::MAX); + + if (geometry_type_component < min_geometry_type_value || + geometry_type_component > max_geometry_type_value || + dimensions_component < min_dimension_value || + dimensions_component > max_dimension_value) { + return ::arrow::Status::SerializationError("Invalid WKB geometry type: ", + wkb_geometry_type); + } + + GeometryTypeAndDimensions out{static_cast(geometry_type_component), + static_cast(dimensions_component)}; + return out; +} + +} // namespace + +::arrow::Status WKBGeometryBounder::ReadGeometry(const uint8_t* data, int64_t size) { + WKBBuffer src{data, size}; + ARROW_RETURN_NOT_OK(ReadGeometryInternal(&src, /*record_wkb_type=*/true)); + if (src.size() != 0) { + return ::arrow::Status::SerializationError( + "Exepcted zero bytes after consuming WKB but got ", src.size()); + } + + return ::arrow::Status::OK(); +} + +::arrow::Status WKBGeometryBounder::ReadGeometryInternal(WKBBuffer* src, + bool record_wkb_type) { + ARROW_ASSIGN_OR_RAISE(uint8_t endian, src->ReadUInt8()); +#if defined(ARROW_LITTLE_ENDIAN) + bool swap = endian != 0x01; +#else + bool swap = endian != 0x00; +#endif + + ARROW_ASSIGN_OR_RAISE(uint32_t wkb_geometry_type, src->ReadUInt32(swap)); + ARROW_ASSIGN_OR_RAISE(auto geometry_type_and_dimensions, + ParseGeometryType(wkb_geometry_type)); + + // Keep track of geometry types encountered if at the top level + if (record_wkb_type) { + geospatial_types_.insert(static_cast(wkb_geometry_type)); + } + + switch (geometry_type_and_dimensions.first) { + case GeometryType::POINT: + ARROW_RETURN_NOT_OK( + ReadSequence(src, geometry_type_and_dimensions.second, 1, swap)); + break; + + case GeometryType::LINESTRING: { + ARROW_ASSIGN_OR_RAISE(uint32_t n_coords, src->ReadUInt32(swap)); + ARROW_RETURN_NOT_OK( + ReadSequence(src, geometry_type_and_dimensions.second, n_coords, swap)); + break; + } + case GeometryType::POLYGON: { + ARROW_ASSIGN_OR_RAISE(uint32_t n_parts, src->ReadUInt32(swap)); + for (uint32_t i = 0; i < n_parts; i++) { + ARROW_ASSIGN_OR_RAISE(uint32_t n_coords, src->ReadUInt32(swap)); + ARROW_RETURN_NOT_OK( + ReadSequence(src, geometry_type_and_dimensions.second, n_coords, swap)); + } + break; + } + + // These are all encoded the same in WKB, even though this encoding would + // allow for parts to be of a different geometry type or different dimensions. + // For the purposes of bounding, this does not cause us problems. + case GeometryType::MULTIPOINT: + case GeometryType::MULTILINESTRING: + case GeometryType::MULTIPOLYGON: + case GeometryType::GEOMETRYCOLLECTION: { + ARROW_ASSIGN_OR_RAISE(uint32_t n_parts, src->ReadUInt32(swap)); + for (uint32_t i = 0; i < n_parts; i++) { + ARROW_RETURN_NOT_OK(ReadGeometryInternal(src, /*record_wkb_type*/ false)); + } + break; + } + } + + return ::arrow::Status::OK(); +} + +::arrow::Status WKBGeometryBounder::ReadSequence(WKBBuffer* src, Dimensions dimensions, + uint32_t n_coords, bool swap) { + switch (dimensions) { + case Dimensions::XY: + return src->ReadDoubles( + n_coords, swap, [&](BoundingBox::XY coord) { box_.UpdateXY(coord); }); + case Dimensions::XYZ: + return src->ReadDoubles( + n_coords, swap, [&](BoundingBox::XYZ coord) { box_.UpdateXYZ(coord); }); + case Dimensions::XYM: + return src->ReadDoubles( + n_coords, swap, [&](BoundingBox::XYM coord) { box_.UpdateXYM(coord); }); + case Dimensions::XYZM: + return src->ReadDoubles( + n_coords, swap, [&](BoundingBox::XYZM coord) { box_.UpdateXYZM(coord); }); + default: + return ::arrow::Status::Invalid("Unknown dimensions"); + } +} + +} // namespace parquet::geometry diff --git a/cpp/src/parquet/geometry_util_internal.h b/cpp/src/parquet/geometry_util_internal.h new file mode 100644 index 0000000000000..889c118c0fb05 --- /dev/null +++ b/cpp/src/parquet/geometry_util_internal.h @@ -0,0 +1,184 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include +#include + +#include "parquet/platform.h" + +namespace parquet::geometry { + +/// \brief Infinity, used to define bounds of empty bounding boxes +constexpr double kInf = std::numeric_limits::infinity(); + +/// \brief Valid combinations of dimensions allowed by ISO well-known binary +enum class Dimensions { XY = 0, XYZ = 1, XYM = 2, XYZM = 3, MIN = 0, MAX = 3 }; + +/// \brief The supported set of geometry types allowed by ISO well-known binary +enum class GeometryType { + POINT = 1, + LINESTRING = 2, + POLYGON = 3, + MULTIPOINT = 4, + MULTILINESTRING = 5, + MULTIPOLYGON = 6, + GEOMETRYCOLLECTION = 7, + MIN = 1, + MAX = 7 +}; + +/// \brief A collection of intervals representing the encountered ranges of values +/// in each dimension. +struct BoundingBox { + using XY = std::array; + using XYZ = std::array; + using XYM = std::array; + using XYZM = std::array; + + BoundingBox(const XYZM& mins, const XYZM& maxes) : min(mins), max(maxes) {} + BoundingBox() : min{kInf, kInf, kInf, kInf}, max{-kInf, -kInf, -kInf, -kInf} {} + + BoundingBox(const BoundingBox& other) = default; + BoundingBox& operator=(const BoundingBox&) = default; + + /// \brief Update the X and Y bounds to ensure these bounds contain coord + void UpdateXY(const XY& coord) { UpdateInternal(coord); } + + /// \brief Update the X, Y, and Z bounds to ensure these bounds contain coord + void UpdateXYZ(const XYZ& coord) { UpdateInternal(coord); } + + /// \brief Update the X, Y, and M bounds to ensure these bounds contain coord + void UpdateXYM(const XYM& coord) { + min[0] = std::min(min[0], coord[0]); + min[1] = std::min(min[1], coord[1]); + min[3] = std::min(min[3], coord[2]); + max[0] = std::max(max[0], coord[0]); + max[1] = std::max(max[1], coord[1]); + max[3] = std::max(max[3], coord[2]); + } + + /// \brief Update the X, Y, Z, and M bounds to ensure these bounds contain coord + void UpdateXYZM(const XYZM& coord) { UpdateInternal(coord); } + + /// \brief Reset these bounds to an empty state such that they contain no coordinates + void Reset() { + for (int i = 0; i < 4; i++) { + min[i] = kInf; + max[i] = -kInf; + } + } + + /// \brief Update these bounds such they also contain other + void Merge(const BoundingBox& other) { + for (int i = 0; i < 4; i++) { + min[i] = std::min(min[i], other.min[i]); + max[i] = std::max(max[i], other.max[i]); + } + } + + std::string ToString() const { + std::stringstream ss; + ss << "BoundingBox [" << min[0] << " => " << max[0]; + for (int i = 1; i < 4; i++) { + ss << ", " << min[i] << " => " << max[i]; + } + + ss << "]"; + + return ss.str(); + } + + XYZM min; + XYZM max; + + private: + // This works for XY, XYZ, and XYZM + template + void UpdateInternal(Coord coord) { + static_assert(coord.size() <= 4); + + for (size_t i = 0; i < coord.size(); i++) { + min[i] = std::min(min[i], coord[i]); + max[i] = std::max(max[i], coord[i]); + } + } +}; + +inline bool operator==(const BoundingBox& lhs, const BoundingBox& rhs) { + return lhs.min == rhs.min && lhs.max == rhs.max; +} + +class WKBBuffer; + +/// \brief Accumulate a BoundingBox and geometry types based on zero or more well-known +/// binary blobs +class PARQUET_EXPORT WKBGeometryBounder { + public: + WKBGeometryBounder() = default; + WKBGeometryBounder(const WKBGeometryBounder&) = default; + + /// \brief Accumulate the bounds of a serialized well-known binary geometry + /// + /// Returns SerializationError for any parse errors encountered. Bounds for + /// any encountered coordinates are accumulated and the geometry type of + /// the geometry is added to the internal geometry type list. + /// + /// Note that this method is NOT appropriate for bounding a GEOGRAPHY, + /// whose bounds are not a function purely of the vertices. Geography bounding + /// is not yet implemented. + ::arrow::Status ReadGeometry(const uint8_t* data, int64_t size); + + /// \brief Accumulate the bounds of a previously-calculated BoundingBox + void ReadBox(const BoundingBox& box) { box_.Merge(box); } + + /// \brief Accumulate a previously-calculated list of geometry types + void ReadGeometryTypes(const std::vector& geospatial_types) { + geospatial_types_.insert(geospatial_types.begin(), geospatial_types.end()); + } + + /// \brief Retrieve the accumulated bounds + const BoundingBox& Bounds() const { return box_; } + + /// \brief Retrieve the accumulated geometry types + std::vector GeometryTypes() const { + std::vector out(geospatial_types_.begin(), geospatial_types_.end()); + std::sort(out.begin(), out.end()); + return out; + } + + /// \brief Reset the internal bounds and geometry types list to an empty state + void Reset() { + box_.Reset(); + geospatial_types_.clear(); + } + + private: + BoundingBox box_; + std::unordered_set geospatial_types_; + + ::arrow::Status ReadGeometryInternal(WKBBuffer* src, bool record_wkb_type); + + ::arrow::Status ReadSequence(WKBBuffer* src, Dimensions dimensions, uint32_t n_coords, + bool swap); +}; + +} // namespace parquet::geometry diff --git a/cpp/src/parquet/geometry_util_internal_test.cc b/cpp/src/parquet/geometry_util_internal_test.cc new file mode 100644 index 0000000000000..a2809058232f7 --- /dev/null +++ b/cpp/src/parquet/geometry_util_internal_test.cc @@ -0,0 +1,455 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include + +#include "arrow/testing/gtest_util.h" + +#include "parquet/geometry_util_internal.h" +#include "parquet/test_util.h" + +namespace parquet::geometry { + +TEST(TestGeometryUtil, TestBoundingBox) { + BoundingBox box; + EXPECT_EQ(box, BoundingBox({kInf, kInf, kInf, kInf}, {-kInf, -kInf, -kInf, -kInf})); + EXPECT_EQ(box.ToString(), + "BoundingBox [inf => -inf, inf => -inf, inf => -inf, inf => -inf]"); + + BoundingBox box_xyzm({-1, -2, -3, -4}, {1, 2, 3, 4}); + BoundingBox box_xy({-10, -20, kInf, kInf}, {10, 20, -kInf, -kInf}); + BoundingBox box_xyz({kInf, kInf, -30, kInf}, {-kInf, -kInf, 30, -kInf}); + BoundingBox box_xym({kInf, kInf, kInf, -40}, {-kInf, -kInf, -kInf, 40}); + + box_xyzm.Merge(box_xy); + EXPECT_EQ(box_xyzm, BoundingBox({-10, -20, -3, -4}, {10, 20, 3, 4})); + + box_xyzm.Merge(box_xyz); + EXPECT_EQ(box_xyzm, BoundingBox({-10, -20, -30, -4}, {10, 20, 30, 4})); + + box_xyzm.Merge(box_xym); + EXPECT_EQ(box_xyzm, BoundingBox({-10, -20, -30, -40}, {10, 20, 30, 40})); + + box_xyzm.Reset(); + EXPECT_EQ(box_xyzm, BoundingBox()); +} + +struct WKBTestCase { + WKBTestCase() = default; + WKBTestCase(GeometryType x, Dimensions y, const std::vector& z, + const std::vector& box_values = {}) + : geometry_type(x), dimensions(y), wkb(z) { + std::array mins = {kInf, kInf, kInf, kInf}; + std::array maxes{-kInf, -kInf, -kInf, -kInf}; + + if (dimensions == Dimensions::XYM) { + mins = {box_values[0], box_values[1], kInf, box_values[2]}; + maxes = {box_values[3], box_values[4], -kInf, box_values[5]}; + } else { + size_t coord_size = box_values.size() / 2; + for (uint32_t i = 0; i < coord_size; i++) { + mins[i] = box_values[i]; + maxes[i] = box_values[coord_size + i]; + } + } + + box = BoundingBox(mins, maxes); + } + WKBTestCase(const WKBTestCase& other) = default; + + GeometryType geometry_type; + Dimensions dimensions; + std::vector wkb; + BoundingBox box; +}; + +std::ostream& operator<<(std::ostream& os, const WKBTestCase& obj) { + uint32_t iso_wkb_geometry_type = + static_cast(obj.dimensions) * 1000 + static_cast(obj.geometry_type); + os << "WKBTestCase<" << iso_wkb_geometry_type << ">"; + return os; +} + +std::ostream& operator<<(std::ostream& os, const BoundingBox& obj) { + os << obj.ToString(); + return os; +} + +class WKBTestFixture : public ::testing::TestWithParam { + protected: + WKBTestCase test_case; +}; + +TEST_P(WKBTestFixture, TestWKBBounderNonEmpty) { + auto item = GetParam(); + + WKBGeometryBounder bounder; + EXPECT_EQ(bounder.Bounds(), BoundingBox()); + + ASSERT_OK(bounder.ReadGeometry(item.wkb.data(), item.wkb.size())); + + EXPECT_EQ(bounder.Bounds(), item.box); + uint32_t wkb_type = + static_cast(item.dimensions) * 1000 + static_cast(item.geometry_type); + EXPECT_THAT(bounder.GeometryTypes(), ::testing::ElementsAre(::testing::Eq(wkb_type))); + + bounder.Reset(); + EXPECT_EQ(bounder.Bounds(), BoundingBox()); + EXPECT_TRUE(bounder.GeometryTypes().empty()); +} + +INSTANTIATE_TEST_SUITE_P( + TestGeometryUtil, WKBTestFixture, + ::testing::Values( + // POINT (30 10) + WKBTestCase(GeometryType::POINT, Dimensions::XY, + {0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, + {30, 10, 30, 10}), + // POINT Z (30 10 40) + WKBTestCase(GeometryType::POINT, Dimensions::XYZ, + {0x01, 0xe9, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, + {30, 10, 40, 30, 10, 40}), + // POINT M (30 10 300) + WKBTestCase(GeometryType::POINT, Dimensions::XYM, + {0x01, 0xd1, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, + {30, 10, 300, 30, 10, 300}), + // POINT ZM (30 10 40 300) + WKBTestCase(GeometryType::POINT, Dimensions::XYZM, + {0x01, 0xb9, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, + {30, 10, 40, 300, 30, 10, 40, 300}), + // LINESTRING (30 10, 10 30, 40 40) + WKBTestCase(GeometryType::LINESTRING, Dimensions::XY, + {0x01, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, + {10, 10, 40, 40}), + // LINESTRING Z (30 10 40, 10 30 40, 40 40 80) + WKBTestCase(GeometryType::LINESTRING, Dimensions::XYZ, + {0x01, 0xea, 0x03, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x54, 0x40}, + {10, 10, 40, 40, 40, 80}), + // LINESTRING M (30 10 300, 10 30 300, 40 40 1600) + WKBTestCase(GeometryType::LINESTRING, Dimensions::XYM, + {0x01, 0xd2, 0x07, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, + 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x99, 0x40}, + {10, 10, 300, 40, 40, 1600}), + // LINESTRING ZM (30 10 40 300, 10 30 40 300, 40 40 80 1600) + WKBTestCase(GeometryType::LINESTRING, Dimensions::XYZM, + {0x01, 0xba, 0x0b, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x99, 0x40}, + {10, 10, 40, 300, 40, 40, 80, 1600}), + // POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10)) + WKBTestCase(GeometryType::POLYGON, Dimensions::XY, + {0x01, 0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x24, 0x40}, + {10, 10, 40, 40}), + // POLYGON Z ((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40)) + WKBTestCase( + GeometryType::POLYGON, Dimensions::XYZ, + {0x01, 0xeb, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x44, 0x40}, + {10, 10, 30, 40, 40, 80}), + // POLYGON M ((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300)) + WKBTestCase( + GeometryType::POLYGON, Dimensions::XYM, + {0x01, 0xd3, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x89, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x69, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xc0, 0x72, 0x40}, + {10, 10, 200, 40, 40, 1600}), + // POLYGON ZM ((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 10 40 + // 300)) + WKBTestCase( + GeometryType::POLYGON, Dimensions::XYZM, + {0x01, 0xbb, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x54, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x89, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x69, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xc0, 0x72, 0x40}, + {10, 10, 30, 200, 40, 40, 80, 1600}), + // MULTIPOINT ((30 10)) + WKBTestCase(GeometryType::MULTIPOINT, Dimensions::XY, + {0x01, 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, + {30, 10, 30, 10}), + // MULTIPOINT Z ((30 10 40)) + WKBTestCase(GeometryType::MULTIPOINT, Dimensions::XYZ, + {0x01, 0xec, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0xe9, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, + {30, 10, 40, 30, 10, 40}), + // MULTIPOINT M ((30 10 300)) + WKBTestCase(GeometryType::MULTIPOINT, Dimensions::XYM, + {0x01, 0xd4, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0xd1, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, + {30, 10, 300, 30, 10, 300}), + // MULTIPOINT ZM ((30 10 40 300)) + WKBTestCase(GeometryType::MULTIPOINT, Dimensions::XYZM, + {0x01, 0xbc, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0xb9, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, + {30, 10, 40, 300, 30, 10, 40, 300}), + // MULTILINESTRING ((30 10, 10 30, 40 40)) + WKBTestCase(GeometryType::MULTILINESTRING, Dimensions::XY, + {0x01, 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x02, + 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, + {10, 10, 40, 40}), + // MULTILINESTRING Z ((30 10 40, 10 30 40, 40 40 80)) + WKBTestCase( + GeometryType::MULTILINESTRING, Dimensions::XYZ, + {0x01, 0xed, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xea, 0x03, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, 0x40}, + {10, 10, 40, 40, 40, 80}), + // MULTILINESTRING M ((30 10 300, 10 30 300, 40 40 1600)) + WKBTestCase( + GeometryType::MULTILINESTRING, Dimensions::XYM, + {0x01, 0xd5, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xd2, 0x07, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40}, + {10, 10, 300, 40, 40, 1600}), + // MULTILINESTRING ZM ((30 10 40 300, 10 30 40 300, 40 40 80 1600)) + WKBTestCase( + GeometryType::MULTILINESTRING, Dimensions::XYZM, + {0x01, 0xbd, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xba, 0x0b, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x54, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40}, + {10, 10, 40, 300, 40, 40, 80, 1600}), + // MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10))) + WKBTestCase( + GeometryType::MULTIPOLYGON, Dimensions::XY, + {0x01, 0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x03, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, + {10, 10, 40, 40}), + // MULTIPOLYGON Z (((30 10 40, 40 40 80, 20 40 60, 10 20 30, 30 10 40))) + WKBTestCase( + GeometryType::MULTIPOLYGON, Dimensions::XYZ, + {0x01, 0xee, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xeb, 0x03, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x54, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x4e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, + {10, 10, 30, 40, 40, 80}), + // MULTIPOLYGON M (((30 10 300, 40 40 1600, 20 40 800, 10 20 200, 30 10 300))) + WKBTestCase( + GeometryType::MULTIPOLYGON, Dimensions::XYM, + {0x01, 0xd6, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xd3, 0x07, 0x00, + 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x99, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x89, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x69, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, + {10, 10, 200, 40, 40, 1600}), + // MULTIPOLYGON ZM (((30 10 40 300, 40 40 80 1600, 20 40 60 800, 10 20 30 200, 30 + // 10 40 300))) + WKBTestCase(GeometryType::MULTIPOLYGON, Dimensions::XYZM, + {0x01, 0xbe, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0xbb, + 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x54, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x99, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x44, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x89, 0x40, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x24, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x34, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3e, 0x40, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x69, 0x40, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, + {10, 10, 30, 200, 40, 40, 80, 1600}), + // GEOMETRYCOLLECTION (POINT (30 10)) + WKBTestCase(GeometryType::GEOMETRYCOLLECTION, Dimensions::XY, + {0x01, 0x07, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40}, + {30, 10, 30, 10}), + // GEOMETRYCOLLECTION Z (POINT Z (30 10 40)) + WKBTestCase(GeometryType::GEOMETRYCOLLECTION, Dimensions::XYZ, + {0x01, 0xef, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0xe9, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40}, + {30, 10, 40, 30, 10, 40}), + // GEOMETRYCOLLECTION M (POINT M (30 10 300)) + WKBTestCase(GeometryType::GEOMETRYCOLLECTION, Dimensions::XYM, + {0x01, 0xd7, 0x07, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0xd1, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, + {30, 10, 300, 30, 10, 300}), + // GEOMETRYCOLLECTION ZM (POINT ZM (30 10 40 300)) + WKBTestCase(GeometryType::GEOMETRYCOLLECTION, Dimensions::XYZM, + {0x01, 0xbf, 0x0b, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, + 0xb9, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x3e, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x24, 0x40, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x44, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xc0, 0x72, 0x40}, + {30, 10, 40, 300, 30, 10, 40, 300}))); + +struct MakeWKBPointTestCase { + MakeWKBPointTestCase() = default; + MakeWKBPointTestCase(const std::vector xyzm, bool has_z, bool has_m) + : has_z(has_z), has_m(has_m) { + memcpy(this->xyzm, xyzm.data(), sizeof(this->xyzm)); + } + + double xyzm[4]; + bool has_z; + bool has_m; +}; + +class MakeWKBPointTestFixture : public testing::TestWithParam {}; + +TEST_P(MakeWKBPointTestFixture, MakeWKBPoint) { + auto param = GetParam(); + std::string wkb = test::MakeWKBPoint(param.xyzm, param.has_z, param.has_m); + WKBGeometryBounder bounder; + ASSERT_OK(bounder.ReadGeometry(reinterpret_cast(wkb.data()), wkb.size())); + const BoundingBox::XYZM& mins = bounder.Bounds().min; + EXPECT_DOUBLE_EQ(param.xyzm[0], mins[0]); + EXPECT_DOUBLE_EQ(param.xyzm[1], mins[1]); + if (param.has_z) { + EXPECT_DOUBLE_EQ(param.xyzm[2], mins[2]); + } else { + EXPECT_TRUE(std::isinf(mins[2])); + } + if (param.has_m) { + EXPECT_DOUBLE_EQ(param.xyzm[3], mins[3]); + } else { + EXPECT_TRUE(std::isinf(mins[3])); + } +} + +INSTANTIATE_TEST_SUITE_P( + TestGeometryUtil, MakeWKBPointTestFixture, + ::testing::Values(MakeWKBPointTestCase({30, 10, 40, 300}, false, false), + MakeWKBPointTestCase({30, 10, 40, 300}, true, false), + MakeWKBPointTestCase({30, 10, 40, 300}, false, true), + MakeWKBPointTestCase({30, 10, 40, 300}, true, true))); + +} // namespace parquet::geometry diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 9b53da021f52e..8e987fac33be5 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -38,6 +38,7 @@ #include "parquet/schema.h" #include "parquet/schema_internal.h" #include "parquet/size_statistics.h" +#include "parquet/statistics.h" #include "parquet/thrift_internal.h" namespace parquet { @@ -111,6 +112,17 @@ static std::shared_ptr MakeTypedColumnStats( metadata.statistics.__isset.null_count, metadata.statistics.__isset.distinct_count); } +static std::shared_ptr MakeColumnGeometryStats( + const format::ColumnMetaData& metadata, const ColumnDescriptor* descr) { + if (metadata.__isset.geospatial_statistics) { + EncodedGeospatialStatistics encoded_geometry_stats = + FromThrift(metadata.geospatial_statistics); + return std::make_shared(encoded_geometry_stats); + } else { + return nullptr; + } +} + std::shared_ptr MakeColumnStats(const format::ColumnMetaData& meta_data, const ColumnDescriptor* descr) { switch (static_cast(meta_data.type)) { @@ -271,6 +283,7 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { size_statistics_->Validate(descr_); } possible_stats_ = nullptr; + possible_geometry_stats_ = nullptr; InitKeyValueMetadata(); } @@ -309,6 +322,15 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { descr_->sort_order()); } + inline bool is_geometry_stats_set() const { + DCHECK(writer_version_ != nullptr); + if (possible_geometry_stats_ == nullptr && + column_metadata_->__isset.geospatial_statistics) { + possible_geometry_stats_ = MakeColumnGeometryStats(*column_metadata_, descr_); + } + return possible_geometry_stats_ != nullptr && possible_geometry_stats_->is_valid(); + } + inline std::shared_ptr statistics() const { return is_stats_set() ? possible_stats_ : nullptr; } @@ -317,6 +339,10 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { return size_statistics_; } + inline std::shared_ptr geometry_statistics() const { + return is_geometry_stats_set() ? possible_geometry_stats_ : nullptr; + } + inline Compression::type compression() const { return LoadEnumSafe(&column_metadata_->codec); } @@ -396,6 +422,7 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl { } mutable std::shared_ptr possible_stats_; + mutable std::shared_ptr possible_geometry_stats_; std::vector encodings_; std::vector encoding_stats_; const format::ColumnChunk* column_; @@ -447,12 +474,20 @@ std::shared_ptr ColumnChunkMetaData::statistics() const { return impl_->statistics(); } +std::shared_ptr ColumnChunkMetaData::geometry_statistics() const { + return impl_->geometry_statistics(); +} + bool ColumnChunkMetaData::is_stats_set() const { return impl_->is_stats_set(); } std::shared_ptr ColumnChunkMetaData::size_statistics() const { return impl_->size_statistics(); } +bool ColumnChunkMetaData::is_geometry_stats_set() const { + return impl_->is_geometry_stats_set(); +} + std::optional ColumnChunkMetaData::bloom_filter_offset() const { return impl_->bloom_filter_offset(); } @@ -1563,6 +1598,10 @@ class ColumnChunkMetaDataBuilder::ColumnChunkMetaDataBuilderImpl { column_chunk_->meta_data.__set_size_statistics(ToThrift(size_stats)); } + void SetGeospatialStatistics(const EncodedGeospatialStatistics& val) { + column_chunk_->meta_data.__set_geospatial_statistics(ToThrift(val)); + } + void Finish(int64_t num_values, int64_t dictionary_page_offset, int64_t index_page_offset, int64_t data_page_offset, int64_t compressed_size, int64_t uncompressed_size, bool has_dictionary, @@ -1776,6 +1815,11 @@ void ColumnChunkMetaDataBuilder::SetSizeStatistics(const SizeStatistics& size_st impl_->SetSizeStatistics(size_stats); } +void ColumnChunkMetaDataBuilder::SetGeospatialStatistics( + const EncodedGeospatialStatistics& result) { + impl_->SetGeospatialStatistics(result); +} + void ColumnChunkMetaDataBuilder::SetKeyValueMetadata( std::shared_ptr key_value_metadata) { impl_->SetKeyValueMetadata(std::move(key_value_metadata)); diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h index 9a3964f7d6574..a482c778fef98 100644 --- a/cpp/src/parquet/metadata.h +++ b/cpp/src/parquet/metadata.h @@ -31,6 +31,20 @@ namespace parquet { +class ColumnDescriptor; +class EncodedStatistics; +class EncodedGeospatialStatistics; +class FileCryptoMetaData; +class Statistics; +class GeospatialStatistics; +class SchemaDescriptor; + +namespace schema { + +class ColumnPath; + +} // namespace schema + using KeyValueMetadata = ::arrow::KeyValueMetadata; class PARQUET_EXPORT ApplicationVersion { @@ -141,8 +155,10 @@ class PARQUET_EXPORT ColumnChunkMetaData { int64_t num_values() const; std::shared_ptr path_in_schema() const; bool is_stats_set() const; + bool is_geometry_stats_set() const; std::shared_ptr statistics() const; std::shared_ptr size_statistics() const; + std::shared_ptr geometry_statistics() const; Compression::type compression() const; // Indicate if the ColumnChunk compression is supported by the current @@ -440,6 +456,9 @@ class PARQUET_EXPORT ColumnChunkMetaDataBuilder { void SetStatistics(const EncodedStatistics& stats); void SetSizeStatistics(const SizeStatistics& size_stats); + // column geometry statistics + void SetGeospatialStatistics(const EncodedGeospatialStatistics& geometry_stats); + void SetKeyValueMetadata(std::shared_ptr key_value_metadata); // get the column descriptor diff --git a/cpp/src/parquet/parquet.thrift b/cpp/src/parquet/parquet.thrift index a1883d335aa23..9b89cca7f7c1a 100644 --- a/cpp/src/parquet/parquet.thrift +++ b/cpp/src/parquet/parquet.thrift @@ -61,14 +61,14 @@ enum ConvertedType { * values */ LIST = 3; - /** an enum is converted into a binary field */ + /** an enum is converted into a BYTE_ARRAY field */ ENUM = 4; /** * A decimal value. * - * This may be used to annotate binary or fixed primitive types. The - * underlying byte array stores the unscaled value encoded as two's + * This may be used to annotate BYTE_ARRAY or FIXED_LEN_BYTE_ARRAY primitive + * types. The underlying byte array stores the unscaled value encoded as two's * complement using big-endian byte order (the most significant byte is the * zeroth element). The value of the decimal is the value * 10^{-scale}. * @@ -159,7 +159,7 @@ enum ConvertedType { /** * An embedded BSON document * - * A BSON document embedded within a single BINARY column. + * A BSON document embedded within a single BYTE_ARRAY column. */ BSON = 20; @@ -182,10 +182,10 @@ enum ConvertedType { * Representation of Schemas */ enum FieldRepetitionType { - /** This field is required (can not be null) and each record has exactly 1 value. */ + /** This field is required (can not be null) and each row has exactly 1 value. */ REQUIRED = 0; - /** The field is optional (can be null) and each record has 0 or 1 values. */ + /** The field is optional (can be null) and each row has 0 or 1 values. */ OPTIONAL = 1; /** The field is repeated and can contain 0 or more values */ @@ -238,6 +238,29 @@ struct SizeStatistics { 3: optional list definition_level_histogram; } +/** + * Bounding box for GEOMETRY or GEOGRAPHY type in the representation of min/max + * value pair of coordinates from each axis. + */ +struct BoundingBox { + 1: required double xmin; + 2: required double xmax; + 3: required double ymin; + 4: required double ymax; + 5: optional double zmin; + 6: optional double zmax; + 7: optional double mmin; + 8: optional double mmax; +} + +/** Statistics specific to Geometry and Geography logical types */ +struct GeospatialStatistics { + /** A bounding box of geospatial instances */ + 1: optional BoundingBox bbox; + /** Geospatial type codes of all instances, or an empty list if not known */ + 2: optional list geospatial_types; +} + /** * Statistics per row group and per page * All fields are optional. @@ -258,7 +281,14 @@ struct Statistics { */ 1: optional binary max; 2: optional binary min; - /** count of null value in the column */ + /** + * Count of null values in the column. + * + * Writers SHOULD always write this field even if it is zero (i.e. no null value) + * or the column is not nullable. + * Readers MUST distinguish between null_count not being present and null_count == 0. + * If null_count is not present, readers MUST NOT assume null_count == 0. + */ 3: optional i64 null_count; /** count of distinct values occurring */ 4: optional i64 distinct_count; @@ -283,11 +313,11 @@ struct Statistics { } /** Empty structs to use as logical type annotations */ -struct StringType {} // allowed for BINARY, must be encoded with UTF-8 +struct StringType {} // allowed for BYTE_ARRAY, must be encoded with UTF-8 struct UUIDType {} // allowed for FIXED[16], must encoded raw UUID bytes struct MapType {} // see LogicalTypes.md struct ListType {} // see LogicalTypes.md -struct EnumType {} // allowed for BINARY, must be encoded with UTF-8 +struct EnumType {} // allowed for BYTE_ARRAY, must be encoded with UTF-8 struct DateType {} // allowed for INT32 struct Float16Type {} // allowed for FIXED[2], must encoded raw FLOAT16 bytes @@ -309,7 +339,7 @@ struct NullType {} // allowed for any physical type, only null values stored * To maintain forward-compatibility in v1, implementations using this logical * type must also set scale and precision on the annotated SchemaElement. * - * Allowed for physical types: INT32, INT64, FIXED, and BINARY + * Allowed for physical types: INT32, INT64, FIXED_LEN_BYTE_ARRAY, and BYTE_ARRAY. */ struct DecimalType { 1: required i32 scale @@ -361,7 +391,7 @@ struct IntType { /** * Embedded JSON logical type annotation * - * Allowed for physical types: BINARY + * Allowed for physical types: BYTE_ARRAY */ struct JsonType { } @@ -369,11 +399,66 @@ struct JsonType { /** * Embedded BSON logical type annotation * - * Allowed for physical types: BINARY + * Allowed for physical types: BYTE_ARRAY */ struct BsonType { } +/** + * Embedded Variant logical type annotation + */ +struct VariantType { +} + +/** Edge interpolation algorithm for Geography logical type */ +enum EdgeInterpolationAlgorithm { + SPHERICAL = 0; + VINCENTY = 1; + THOMAS = 2; + ANDOYER = 3; + KARNEY = 4; +} + +/** + * Embedded Geometry logical type annotation + * + * Geospatial features in the Well-Known Binary (WKB) format and edges interpolation + * is always linear/planar. + * + * A custom CRS can be set by the crs field. If unset, it defaults to "OGC:CRS84", + * which means that the geometries must be stored in longitude, latitude based on + * the WGS84 datum. + * + * Allowed for physical type: BYTE_ARRAY. + * + * See Geospatial.md for details. + */ +struct GeometryType { + 1: optional string crs; +} + +/** + * Embedded Geography logical type annotation + * + * Geospatial features in the WKB format with an explicit (non-linear/non-planar) + * edges interpolation algorithm. + * + * A custom geographic CRS can be set by the crs field, where longitudes are + * bound by [-180, 180] and latitudes are bound by [-90, 90]. If unset, the CRS + * defaults to "OGC:CRS84". + * + * An optional algorithm can be set to correctly interpret edges interpolation + * of the geometries. If unset, the algorithm defaults to SPHERICAL. + * + * Allowed for physical type: BYTE_ARRAY. + * + * See Geospatial.md for details. + */ +struct GeographyType { + 1: optional string crs; + 2: optional EdgeInterpolationAlgorithm algorithm; +} + /** * LogicalType annotations to replace ConvertedType. * @@ -404,6 +489,9 @@ union LogicalType { 13: BsonType BSON // use ConvertedType BSON 14: UUIDType UUID // no compatible ConvertedType 15: Float16Type FLOAT16 // no compatible ConvertedType + 16: VariantType VARIANT // no compatible ConvertedType + 17: GeometryType GEOMETRY // no compatible ConvertedType + 18: GeographyType GEOGRAPHY // no compatible ConvertedType } /** @@ -527,12 +615,15 @@ enum Encoding { */ RLE_DICTIONARY = 8; - /** Encoding for floating-point data. + /** Encoding for fixed-width data (FLOAT, DOUBLE, INT32, INT64, FIXED_LEN_BYTE_ARRAY). K byte-streams are created where K is the size in bytes of the data type. - The individual bytes of an FP value are scattered to the corresponding stream and + The individual bytes of a value are scattered to the corresponding stream and the streams are concatenated. This itself does not reduce the size of the data but can lead to better compression afterwards. + + Added in 2.8 for FLOAT and DOUBLE. + Support for INT32, INT64 and FIXED_LEN_BYTE_ARRAY added in 2.11. */ BYTE_STREAM_SPLIT = 9; } @@ -576,7 +667,13 @@ enum BoundaryOrder { /** Data page header */ struct DataPageHeader { - /** Number of values, including NULLs, in this data page. **/ + /** + * Number of values, including NULLs, in this data page. + * + * If a OffsetIndex is present, a page must begin at a row + * boundary (repetition_level = 0). Otherwise, pages may begin + * within a row (repetition_level > 0). + **/ 1: required i32 num_values /** Encoding used for this data page **/ @@ -623,7 +720,11 @@ struct DataPageHeaderV2 { /** Number of NULL values, in this data page. Number of non-null = num_values - num_nulls which is also the number of values in the data section **/ 2: required i32 num_nulls - /** Number of rows in this data page. which means pages change on record boundaries (r = 0) **/ + /** + * Number of rows in this data page. Every page must begin at a + * row boundary (repetition_level = 0): rows must **not** be + * split across page boundaries when using V2 data pages. + **/ 3: required i32 num_rows /** Encoding used for data in this page **/ 4: required Encoding encoding @@ -736,10 +837,10 @@ struct PageHeader { } /** - * Wrapper struct to specify sort order + * Sort order within a RowGroup of a leaf column */ struct SortingColumn { - /** The column index (in this row group) **/ + /** The ordinal position of the column (in this row group) **/ 1: required i32 column_idx /** If true, indicates this column is sorted in descending order. **/ @@ -789,7 +890,7 @@ struct ColumnMetaData { /** total byte size of all uncompressed pages in this column chunk (including the headers) **/ 6: required i64 total_uncompressed_size - /** total byte size of all compressed, and potentially encrypted, pages + /** total byte size of all compressed, and potentially encrypted, pages * in this column chunk (including the headers) **/ 7: required i64 total_compressed_size @@ -831,6 +932,9 @@ struct ColumnMetaData { * filter pushdown. */ 16: optional SizeStatistics size_statistics; + + /** Optional statistics specific for Geometry and Geography logical types */ + 17: optional GeospatialStatistics geospatial_statistics; } struct EncryptionWithFooterKey { @@ -855,12 +959,21 @@ struct ColumnChunk { **/ 1: optional string file_path - /** Byte offset in file_path to the ColumnMetaData **/ - 2: required i64 file_offset + /** Deprecated: Byte offset in file_path to the ColumnMetaData + * + * Past use of this field has been inconsistent, with some implementations + * using it to point to the ColumnMetaData and some using it to point to + * the first page in the column chunk. In many cases, the ColumnMetaData at this + * location is wrong. This field is now deprecated and should not be used. + * Writers should set this field to 0 if no ColumnMetaData has been written outside + * the footer. + */ + 2: required i64 file_offset = 0 - /** Column metadata for this chunk. This is the same content as what is at - * file_path/file_offset. Having it here has it replicated in the file - * metadata. + /** Column metadata for this chunk. Some writers may also replicate this at the + * location pointed to by file_path/file_offset. + * Note: while marked as optional, this field is in fact required by most major + * Parquet implementations. As such, writers MUST populate this field. **/ 3: optional ColumnMetaData meta_data @@ -904,10 +1017,10 @@ struct RowGroup { * in this row group **/ 5: optional i64 file_offset - /** Total byte size of all compressed (and potentially encrypted) column data + /** Total byte size of all compressed (and potentially encrypted) column data * in this row group **/ 6: optional i64 total_compressed_size - + /** Row group ordinal in the file **/ 7: optional i16 ordinal } @@ -946,12 +1059,15 @@ union ColumnOrder { * TIME_MICROS - signed comparison * TIMESTAMP_MILLIS - signed comparison * TIMESTAMP_MICROS - signed comparison - * INTERVAL - unsigned comparison + * INTERVAL - undefined * JSON - unsigned byte-wise comparison * BSON - unsigned byte-wise comparison * ENUM - unsigned byte-wise comparison * LIST - undefined * MAP - undefined + * VARIANT - undefined + * GEOMETRY - undefined + * GEOGRAPHY - undefined * * In the absence of logical types, the sort order is determined by the physical type: * BOOLEAN - false, true @@ -971,7 +1087,7 @@ union ColumnOrder { * - If the min is +0, the row group may contain -0 values as well. * - If the max is -0, the row group may contain +0 values as well. * - When looking for NaN values, min and max should be ignored. - * + * * When writing statistics the following rules should be followed: * - NaNs should not be written to min or max statistics fields. * - If the computed max value is zero (whether negative or positive), @@ -993,12 +1109,20 @@ struct PageLocation { 2: required i32 compressed_page_size /** - * Index within the RowGroup of the first row of the page; this means pages - * change on record boundaries (r = 0). + * Index within the RowGroup of the first row of the page. When an + * OffsetIndex is present, pages must begin on row boundaries + * (repetition_level = 0). */ 3: required i64 first_row_index } +/** + * Optional offsets for each data page in a ColumnChunk. + * + * Forms part of the page index, along with ColumnIndex. + * + * OffsetIndex may be present even if ColumnIndex is not. + */ struct OffsetIndex { /** * PageLocations, ordered by increasing PageLocation.offset. It is required @@ -1015,8 +1139,14 @@ struct OffsetIndex { } /** - * Description for ColumnIndex. - * Each [i] refers to the page at OffsetIndex.page_locations[i] + * Optional statistics for each data page in a ColumnChunk. + * + * Forms part the page index, along with OffsetIndex. + * + * If this structure is present, OffsetIndex must also be present. + * + * For each field in this structure, [i] refers to the page at + * OffsetIndex.page_locations[i] */ struct ColumnIndex { /** @@ -1049,7 +1179,16 @@ struct ColumnIndex { */ 4: required BoundaryOrder boundary_order - /** A list containing the number of null values for each page **/ + /** + * A list containing the number of null values for each page + * + * Writers SHOULD always write this field even if no null values + * are present or the column is not nullable. + * Readers MUST distinguish between null_counts not being present + * and null_count being 0. + * If null_counts are not present, readers MUST NOT assume all + * null counts are 0. + */ 5: optional list null_counts /** @@ -1069,7 +1208,6 @@ struct ColumnIndex { * Same as repetition_level_histograms except for definitions levels. **/ 7: optional list definition_level_histograms; - } struct AesGcmV1 { @@ -1149,31 +1287,30 @@ struct FileMetaData { */ 7: optional list column_orders; - /** + /** * Encryption algorithm. This field is set only in encrypted files * with plaintext footer. Files with encrypted footer store algorithm id * in FileCryptoMetaData structure. */ 8: optional EncryptionAlgorithm encryption_algorithm - /** - * Retrieval metadata of key used for signing the footer. - * Used only in encrypted files with plaintext footer. - */ + /** + * Retrieval metadata of key used for signing the footer. + * Used only in encrypted files with plaintext footer. + */ 9: optional binary footer_signing_key_metadata } /** Crypto metadata for files with encrypted footer **/ struct FileCryptoMetaData { - /** + /** * Encryption algorithm. This field is only used for files * with encrypted footer. Files with plaintext footer store algorithm id * inside footer (FileMetaData structure). */ 1: required EncryptionAlgorithm encryption_algorithm - - /** Retrieval metadata of key used for encryption of footer, + + /** Retrieval metadata of key used for encryption of footer, * and (possibly) columns **/ 2: optional binary key_metadata } - diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h index 19436b84a379b..56c0bc7f25021 100644 --- a/cpp/src/parquet/properties.h +++ b/cpp/src/parquet/properties.h @@ -1038,6 +1038,7 @@ class PARQUET_EXPORT ArrowWriterProperties { store_schema_(false), compliant_nested_types_(true), engine_version_(V2), + write_geospatial_logical_types_(false), use_threads_(kArrowDefaultUseThreads), executor_(NULLPTR) {} virtual ~Builder() = default; @@ -1112,6 +1113,12 @@ class PARQUET_EXPORT ArrowWriterProperties { return this; } + /// Write GEOMETRY and GEOGRAPHY logical types where possible. + Builder* write_geospatial_logical_types() { + write_geospatial_logical_types_ = true; + return this; + } + /// \brief Set whether to use multiple threads to write columns /// in parallel in the buffered row group mode. /// @@ -1139,7 +1146,7 @@ class PARQUET_EXPORT ArrowWriterProperties { return std::shared_ptr(new ArrowWriterProperties( write_timestamps_as_int96_, coerce_timestamps_enabled_, coerce_timestamps_unit_, truncated_timestamps_allowed_, store_schema_, compliant_nested_types_, - engine_version_, use_threads_, executor_)); + engine_version_, write_geospatial_logical_types_, use_threads_, executor_)); } private: @@ -1152,6 +1159,7 @@ class PARQUET_EXPORT ArrowWriterProperties { bool store_schema_; bool compliant_nested_types_; EngineVersion engine_version_; + bool write_geospatial_logical_types_; bool use_threads_; ::arrow::internal::Executor* executor_; @@ -1181,6 +1189,9 @@ class PARQUET_EXPORT ArrowWriterProperties { /// place in case there are bugs detected in V2. EngineVersion engine_version() const { return engine_version_; } + /// \brief Write GEOMETRY and/or GEOGRAPHY logical types when converting GeoArrow types + bool write_geospatial_logical_types() const { return write_geospatial_logical_types_; } + /// \brief Returns whether the writer will use multiple threads /// to write columns in parallel in the buffered row group mode. bool use_threads() const { return use_threads_; } @@ -1194,7 +1205,8 @@ class PARQUET_EXPORT ArrowWriterProperties { ::arrow::TimeUnit::type coerce_timestamps_unit, bool truncated_timestamps_allowed, bool store_schema, bool compliant_nested_types, - EngineVersion engine_version, bool use_threads, + EngineVersion engine_version, + bool write_geospataial_logical_types, bool use_threads, ::arrow::internal::Executor* executor) : write_timestamps_as_int96_(write_nanos_as_int96), coerce_timestamps_enabled_(coerce_timestamps_enabled), @@ -1203,6 +1215,7 @@ class PARQUET_EXPORT ArrowWriterProperties { store_schema_(store_schema), compliant_nested_types_(compliant_nested_types), engine_version_(engine_version), + write_geospatial_logical_types_(write_geospataial_logical_types), use_threads_(use_threads), executor_(executor) {} @@ -1213,6 +1226,7 @@ class PARQUET_EXPORT ArrowWriterProperties { const bool store_schema_; const bool compliant_nested_types_; const EngineVersion engine_version_; + const bool write_geospatial_logical_types_; const bool use_threads_; ::arrow::internal::Executor* executor_; }; diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index 62a971799c2db..4be7ba1ccb859 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -34,6 +34,8 @@ #include #include "arrow/array.h" +#include "arrow/array/array_binary.h" +#include "arrow/array/builder_binary.h" #include "arrow/buffer.h" #include "arrow/io/file.h" #include "arrow/testing/future_util.h" @@ -45,13 +47,17 @@ #include "parquet/column_reader.h" #include "parquet/column_scanner.h" +#include "parquet/column_writer.h" #include "parquet/file_reader.h" #include "parquet/file_writer.h" +#include "parquet/geometry_statistics.h" #include "parquet/metadata.h" #include "parquet/page_index.h" #include "parquet/platform.h" #include "parquet/printer.h" +#include "parquet/statistics.h" #include "parquet/test_util.h" +#include "parquet/types.h" namespace rj = arrow::rapidjson; @@ -1857,4 +1863,172 @@ TEST(PageIndexReaderTest, ReadFileWithoutPageIndex) { ASSERT_EQ(nullptr, row_group_index_reader); } +class TestGeometryLogicalType : public ::testing::Test { + public: + const int kNumRows = 1000; + + void WriteTestData(ParquetDataPageVersion data_page_version, + bool enable_write_page_index, bool write_arrow) { + // Make schema + schema::NodeVector fields; + fields.push_back(PrimitiveNode::Make("g", Repetition::REQUIRED, + GeometryLogicalType::Make("srid:1234"), + Type::BYTE_ARRAY)); + auto schema = std::static_pointer_cast( + GroupNode::Make("schema", Repetition::REQUIRED, fields)); + + // Write small batches and small data pages + auto writer_props_builder = WriterProperties::Builder(); + writer_props_builder.write_batch_size(64)->data_pagesize(128)->data_page_version( + data_page_version); + if (enable_write_page_index) { + writer_props_builder.enable_write_page_index(); + } + + std::shared_ptr writer_props = writer_props_builder.build(); + + ASSERT_OK_AND_ASSIGN(auto out_file, ::arrow::io::BufferOutputStream::Create()); + std::shared_ptr file_writer = + ParquetFileWriter::Open(out_file, schema, writer_props); + RowGroupWriter* rg_writer = file_writer->AppendRowGroup(); + + // write WKB points to columns + auto* writer = static_cast(rg_writer->NextColumn()); + if (!write_arrow) { + WriteTestDataUsingWriteBatch(writer); + } else { + WriteTestDataUsingWriteArrow(writer); + } + + rg_writer->Close(); + file_writer->Close(); + + ASSERT_OK_AND_ASSIGN(file_buf, out_file->Finish()); + } + + void WriteTestDataUsingWriteBatch(ByteArrayWriter* writer) { + std::vector buffer(test::kWkbPointSize * kNumRows); + uint8_t* ptr = buffer.data(); + std::vector values(kNumRows); + for (int k = 0; k < kNumRows; k++) { + test::GenerateWKBPoint(ptr, k, k + 1); + values[k].len = test::kWkbPointSize; + values[k].ptr = ptr; + ptr += test::kWkbPointSize; + } + writer->WriteBatch(kNumRows, nullptr, nullptr, values.data()); + } + + void WriteTestDataUsingWriteArrow(ByteArrayWriter* writer) { + ::arrow::BinaryBuilder builder; + std::vector buffer(test::kWkbPointSize * kNumRows); + uint8_t* ptr = buffer.data(); + for (int k = 0; k < kNumRows; k++) { + test::GenerateWKBPoint(ptr, k, k + 1); + ASSERT_OK(builder.Append(ptr, test::kWkbPointSize)); + ptr += test::kWkbPointSize; + } + std::shared_ptr<::arrow::BinaryArray> array; + ASSERT_OK(builder.Finish(&array)); + + std::shared_ptr properties = + ArrowWriterProperties::Builder().build(); + MemoryPool* pool = ::arrow::default_memory_pool(); + auto ctx = std::make_unique(pool, properties.get()); + ASSERT_OK(writer->WriteArrow(nullptr, nullptr, kNumRows, *array, ctx.get(), true)); + } + + void TestWriteAndRead(ParquetDataPageVersion data_page_version, + bool enable_write_page_index, bool write_arrow) { + WriteTestData(data_page_version, enable_write_page_index, write_arrow); + + auto in_file = std::make_shared<::arrow::io::BufferReader>(file_buf); + + ReaderProperties reader_props; + reader_props.enable_buffered_stream(); + reader_props.set_buffer_size(64); + auto file_reader = ParquetFileReader::Open(in_file, reader_props); + + // Check that the geometry statistics are correctly written and read + auto metadata = file_reader->metadata(); + auto page_index_reader = file_reader->GetPageIndexReader(); + int num_row_groups = metadata->num_row_groups(); + for (int i = 0; i < num_row_groups; i++) { + auto row_group_metadata = metadata->RowGroup(i); + auto column_chunk_metadata = row_group_metadata->ColumnChunk(0); + auto geometry_stats = column_chunk_metadata->geometry_statistics(); + CheckGeospatialStatistics(geometry_stats); + } + + // Check the geometry values + int64_t total_values_read = 0; + for (int i = 0; i < num_row_groups; i++) { + auto row_group = file_reader->RowGroup(i); + std::shared_ptr reader = + std::static_pointer_cast(row_group->Column(0)); + while (reader->HasNext()) { + std::vector out(kNumRows); + int64_t values_read = 0; + int64_t levels_read = + reader->ReadBatch(kNumRows, nullptr, nullptr, out.data(), &values_read); + ASSERT_GE(levels_read, 1); + ASSERT_GE(values_read, 1); + + // Check the batch + for (int64_t i = 0; i < values_read; i++) { + const ByteArray& value = out[i]; + double x = 0; + double y = 0; + EXPECT_TRUE(test::GetWKBPointCoordinate(value, &x, &y)); + auto expected_x = static_cast(i + total_values_read); + auto expected_y = static_cast(i + 1 + total_values_read); + EXPECT_DOUBLE_EQ(expected_x, x); + EXPECT_DOUBLE_EQ(expected_y, y); + } + + total_values_read += values_read; + } + } + EXPECT_EQ(kNumRows, total_values_read); + } + + void CheckGeospatialStatistics(std::shared_ptr geom_stats) { + ASSERT_TRUE(geom_stats != nullptr); + std::vector geospatial_types = geom_stats->GetGeometryTypes(); + EXPECT_EQ(1, geospatial_types.size()); + EXPECT_EQ(1, geospatial_types[0]); + EXPECT_GE(geom_stats->GetXMin(), 0); + EXPECT_GT(geom_stats->GetXMax(), geom_stats->GetXMin()); + EXPECT_GT(geom_stats->GetYMin(), 0); + EXPECT_GT(geom_stats->GetYMax(), geom_stats->GetYMin()); + EXPECT_FALSE(geom_stats->HasZ()); + EXPECT_FALSE(geom_stats->HasM()); + } + + protected: + std::shared_ptr file_buf; +}; + +TEST_F(TestGeometryLogicalType, TestWriteAndReadWithPageStatistics) { + for (auto data_page_version : + {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { + TestWriteAndRead(data_page_version, false, false); + } +} + +TEST_F(TestGeometryLogicalType, TestWriteAndReadWithColumnIndex) { + for (auto data_page_version : + {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { + TestWriteAndRead(data_page_version, true, false); + } +} + +TEST_F(TestGeometryLogicalType, TestWriteArrowAndRead) { + for (auto data_page_version : + {ParquetDataPageVersion::V1, ParquetDataPageVersion::V2}) { + TestWriteAndRead(data_page_version, false, true); + TestWriteAndRead(data_page_version, true, true); + } +} + } // namespace parquet diff --git a/cpp/src/parquet/schema.cc b/cpp/src/parquet/schema.cc index 47fa72d829658..5ee18d730abb5 100644 --- a/cpp/src/parquet/schema.cc +++ b/cpp/src/parquet/schema.cc @@ -28,6 +28,7 @@ #include "parquet/exception.h" #include "parquet/schema_internal.h" #include "parquet/thrift_internal.h" +#include "parquet/windows_fixup.h" using parquet::format::SchemaElement; diff --git a/cpp/src/parquet/schema_test.cc b/cpp/src/parquet/schema_test.cc index 2532a8656e69f..1ad82a0a1f30e 100644 --- a/cpp/src/parquet/schema_test.cc +++ b/cpp/src/parquet/schema_test.cc @@ -1544,6 +1544,31 @@ TEST(TestLogicalTypeOperation, LogicalTypeRepresentation) { {LogicalType::BSON(), "BSON", R"({"Type": "BSON"})"}, {LogicalType::UUID(), "UUID", R"({"Type": "UUID"})"}, {LogicalType::Float16(), "Float16", R"({"Type": "Float16"})"}, + {LogicalType::Geometry(), "Geometry(crs=)", R"({"Type": "Geometry"})"}, + {LogicalType::Geometry("srid:1234"), "Geometry(crs=srid:1234)", + R"({"Type": "Geometry", "crs": "srid:1234"})"}, + {LogicalType::Geography(), "Geography(crs=, algorithm=spherical)", + R"({"Type": "Geography"})"}, + {LogicalType::Geography("srid:1234", + LogicalType::EdgeInterpolationAlgorithm::SPHERICAL), + "Geography(crs=srid:1234, algorithm=spherical)", + R"({"Type": "Geography", "crs": "srid:1234"})"}, + {LogicalType::Geography("srid:1234", + LogicalType::EdgeInterpolationAlgorithm::VINCENTY), + "Geography(crs=srid:1234, algorithm=vincenty)", + R"({"Type": "Geography", "crs": "srid:1234", "algorithm": "vincenty"})"}, + {LogicalType::Geography("srid:1234", + LogicalType::EdgeInterpolationAlgorithm::THOMAS), + "Geography(crs=srid:1234, algorithm=thomas)", + R"({"Type": "Geography", "crs": "srid:1234", "algorithm": "thomas"})"}, + {LogicalType::Geography("srid:1234", + LogicalType::EdgeInterpolationAlgorithm::ANDOYER), + "Geography(crs=srid:1234, algorithm=andoyer)", + R"({"Type": "Geography", "crs": "srid:1234", "algorithm": "andoyer"})"}, + {LogicalType::Geography("srid:1234", + LogicalType::EdgeInterpolationAlgorithm::KARNEY), + "Geography(crs=srid:1234, algorithm=karney)", + R"({"Type": "Geography", "crs": "srid:1234", "algorithm": "karney"})"}, {LogicalType::None(), "None", R"({"Type": "None"})"}, }; @@ -1594,6 +1619,8 @@ TEST(TestLogicalTypeOperation, LogicalTypeSortOrder) { {LogicalType::BSON(), SortOrder::UNSIGNED}, {LogicalType::UUID(), SortOrder::UNSIGNED}, {LogicalType::Float16(), SortOrder::SIGNED}, + {LogicalType::Geometry(), SortOrder::UNKNOWN}, + {LogicalType::Geography(), SortOrder::UNKNOWN}, {LogicalType::None(), SortOrder::UNKNOWN}}; for (const ExpectedSortOrder& c : cases) { @@ -2265,6 +2292,24 @@ TEST(TestLogicalTypeSerialization, Roundtrips) { {LogicalType::BSON(), Type::BYTE_ARRAY, -1}, {LogicalType::UUID(), Type::FIXED_LEN_BYTE_ARRAY, 16}, {LogicalType::Float16(), Type::FIXED_LEN_BYTE_ARRAY, 2}, + {LogicalType::Geometry(), Type::BYTE_ARRAY, -1}, + {LogicalType::Geometry("srid:1234"), Type::BYTE_ARRAY, -1}, + {LogicalType::Geography(), Type::BYTE_ARRAY, -1}, + {LogicalType::Geography("srid:1234", + LogicalType::EdgeInterpolationAlgorithm::SPHERICAL), + Type::BYTE_ARRAY, -1}, + {LogicalType::Geography("srid:1234", + LogicalType::EdgeInterpolationAlgorithm::VINCENTY), + Type::BYTE_ARRAY, -1}, + {LogicalType::Geography("srid:1234", + LogicalType::EdgeInterpolationAlgorithm::THOMAS), + Type::BYTE_ARRAY, -1}, + {LogicalType::Geography("srid:1234", + LogicalType::EdgeInterpolationAlgorithm::ANDOYER), + Type::BYTE_ARRAY, -1}, + {LogicalType::Geography("srid:1234", + LogicalType::EdgeInterpolationAlgorithm::KARNEY), + Type::BYTE_ARRAY, -1}, {LogicalType::None(), Type::BOOLEAN, -1}}; for (const AnnotatedPrimitiveNodeFactoryArguments& c : cases) { diff --git a/cpp/src/parquet/test_util.h b/cpp/src/parquet/test_util.h index 59728cf53f699..bba0e86e31b81 100644 --- a/cpp/src/parquet/test_util.h +++ b/cpp/src/parquet/test_util.h @@ -26,11 +26,11 @@ #include #include #include -#include #include #include +#include "arrow/extension_type.h" #include "arrow/io/memory.h" #include "arrow/testing/util.h" #include "arrow/util/float16.h" @@ -39,6 +39,7 @@ #include "parquet/column_reader.h" #include "parquet/column_writer.h" #include "parquet/encoding.h" +#include "parquet/geometry_util_internal.h" #include "parquet/platform.h" // https://github.com/google/googletest/pull/2904 might not be available @@ -660,7 +661,7 @@ class PrimitiveTypedTest : public ::testing::Test { public: using c_type = typename TestType::c_type; - void SetUpSchema(Repetition::type repetition, int num_columns = 1) { + virtual void SetUpSchema(Repetition::type repetition, int num_columns) { std::vector fields; for (int i = 0; i < num_columns; ++i) { @@ -672,6 +673,8 @@ class PrimitiveTypedTest : public ::testing::Test { schema_.Init(node_); } + void SetUpSchema(Repetition::type repetition) { this->SetUpSchema(repetition, 1); } + void GenerateData(int64_t num_values, uint32_t seed = 0); void SetupValuesOut(int64_t num_values); void SyncValuesOut(); @@ -830,5 +833,124 @@ inline void GenerateData(int num_values, FLBA* out, std::vector* random_fixed_byte_array(num_values, 0, heap->data(), kGenerateDataFLBALength, out); } +// ---------------------------------------------------------------------- +// Test utility functions for geometry + +#if defined(ARROW_LITTLE_ENDIAN) +static constexpr uint8_t kWkbNativeEndianness = 0x01; +#else +static constexpr uint8_t kWkbNativeEndianness = 0x00; +#endif + +static uint32_t GeometryTypeToWKB(geometry::GeometryType geometry_type, bool has_z, + bool has_m) { + auto wkb_geom_type = static_cast(geometry_type); + + if (has_z) { + wkb_geom_type += 1000; + } + + if (has_m) { + wkb_geom_type += 2000; + } + + return wkb_geom_type; +} + +static inline std::string MakeWKBPoint(const double* xyzm, bool has_z, bool has_m) { + // 1:endianness + 4:type + 8:x + 8:y + int num_bytes = 21 + (has_z ? 8 : 0) + (has_m ? 8 : 0); + std::string wkb(num_bytes, 0); + char* ptr = wkb.data(); + + ptr[0] = kWkbNativeEndianness; + uint32_t geom_type = GeometryTypeToWKB(geometry::GeometryType::POINT, has_z, has_m); + std::memcpy(&ptr[1], &geom_type, 4); + std::memcpy(&ptr[5], &xyzm[0], 8); + std::memcpy(&ptr[13], &xyzm[1], 8); + ptr += 21; + + if (has_z) { + std::memcpy(ptr, &xyzm[2], 8); + ptr += 8; + } + if (has_m) { + std::memcpy(ptr, &xyzm[3], 8); + } + + return wkb; +} + +static constexpr int kWkbPointSize = 21; // 1:endianness + 4:type + 8:x + 8:y + +inline void GenerateWKBPoint(uint8_t* ptr, double x, double y) { + double xyzm[] = {x, y, geometry::kInf, geometry::kInf}; + std::string wkb = MakeWKBPoint(xyzm, false, false); + std::memcpy(ptr, wkb.data(), kWkbPointSize); +} + +inline bool GetWKBPointCoordinate(const ByteArray& value, double* out_x, double* out_y) { + if (value.len != kWkbPointSize) { + return false; + } + if (value.ptr[0] != kWkbNativeEndianness) { + return false; + } + uint32_t expected_geom_type = + GeometryTypeToWKB(geometry::GeometryType::POINT, false, false); + uint32_t geom_type = 0; + memcpy(&geom_type, &value.ptr[1], 4); + if (geom_type != expected_geom_type) { + return false; + } + memcpy(out_x, &value.ptr[5], 8); + memcpy(out_y, &value.ptr[13], 8); + return true; +} + +// A minimal version of a geoarrow.wkb extension type to test interoperability +class GeoArrowWkbExtensionType : public ::arrow::ExtensionType { + public: + explicit GeoArrowWkbExtensionType(std::shared_ptr<::arrow::DataType> storage_type, + std::string metadata) + : ::arrow::ExtensionType(std::move(storage_type)), metadata_(std::move(metadata)) {} + + std::string extension_name() const override { return "geoarrow.wkb"; } + + std::string Serialize() const override { return metadata_; } + + ::arrow::Result> Deserialize( + std::shared_ptr<::arrow::DataType> storage_type, + const std::string& serialized_data) const override { + return std::make_shared(std::move(storage_type), + serialized_data); + } + + std::shared_ptr<::arrow::Array> MakeArray( + std::shared_ptr<::arrow::ArrayData> data) const override { + return std::make_shared<::arrow::ExtensionArray>(data); + } + + bool ExtensionEquals(const ExtensionType& other) const override { + return other.extension_name() == extension_name() && other.Serialize() == Serialize(); + } + + private: + std::string metadata_; +}; + +inline std::shared_ptr<::arrow::DataType> geoarrow_wkb( + std::string metadata = "{}", + const std::shared_ptr<::arrow::DataType> storage = ::arrow::binary()) { + return std::make_shared(storage, std::move(metadata)); +} + +inline std::shared_ptr<::arrow::DataType> geoarrow_wkb_lonlat( + const std::shared_ptr<::arrow::DataType> storage = ::arrow::binary()) { + // There are other ways to express lon/lat output, but this is the one that will + // roundtrip into Parquet and back + return geoarrow_wkb(R"({"crs": "OGC:CRS84", "crs_type": "authority_code"})", storage); +} + } // namespace test } // namespace parquet diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index 744af743118e2..0ccf8baa0ccce 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -41,6 +41,7 @@ #include "parquet/encryption/internal_file_decryptor.h" #include "parquet/encryption/internal_file_encryptor.h" #include "parquet/exception.h" +#include "parquet/geometry_statistics.h" #include "parquet/platform.h" #include "parquet/properties.h" #include "parquet/size_statistics.h" @@ -232,6 +233,29 @@ static inline AadMetadata FromThrift(format::AesGcmCtrV1 aesGcmCtrV1) { aesGcmCtrV1.supply_aad_prefix}; } +static inline EncodedGeospatialStatistics FromThrift( + const format::GeospatialStatistics& geometry_stats) { + EncodedGeospatialStatistics out; + + out.geospatial_types = geometry_stats.geospatial_types; + out.xmin = geometry_stats.bbox.xmin; + out.xmax = geometry_stats.bbox.xmax; + out.ymin = geometry_stats.bbox.ymin; + out.ymax = geometry_stats.bbox.ymax; + + if (geometry_stats.bbox.__isset.zmin && geometry_stats.bbox.__isset.zmax) { + out.zmin = geometry_stats.bbox.zmin; + out.zmax = geometry_stats.bbox.zmax; + } + + if (geometry_stats.bbox.__isset.mmin && geometry_stats.bbox.__isset.mmax) { + out.mmin = geometry_stats.bbox.mmin; + out.mmax = geometry_stats.bbox.mmax; + } + + return out; +} + static inline EncryptionAlgorithm FromThrift(format::EncryptionAlgorithm encryption) { EncryptionAlgorithm encryption_algorithm; @@ -332,6 +356,27 @@ static inline format::SortingColumn ToThrift(SortingColumn sorting_column) { return thrift_sorting_column; } +static inline format::GeospatialStatistics ToThrift( + const EncodedGeospatialStatistics& encoded_geometry_stats) { + format::GeospatialStatistics geometry_statistics; + geometry_statistics.__set_geospatial_types(encoded_geometry_stats.geospatial_types); + format::BoundingBox bbox; + bbox.__set_xmin(encoded_geometry_stats.xmin); + bbox.__set_xmax(encoded_geometry_stats.xmax); + bbox.__set_ymin(encoded_geometry_stats.ymin); + bbox.__set_ymax(encoded_geometry_stats.ymax); + if (encoded_geometry_stats.has_z()) { + bbox.__set_zmin(encoded_geometry_stats.zmin); + bbox.__set_zmax(encoded_geometry_stats.zmax); + } + if (encoded_geometry_stats.has_m()) { + bbox.__set_mmin(encoded_geometry_stats.mmin); + bbox.__set_mmax(encoded_geometry_stats.mmax); + } + geometry_statistics.__set_bbox(bbox); + return geometry_statistics; +} + static inline format::Statistics ToThrift(const EncodedStatistics& stats) { format::Statistics statistics; if (stats.has_min) { diff --git a/cpp/src/parquet/types.cc b/cpp/src/parquet/types.cc index bee75c335afd5..a4d27f9e18ecb 100644 --- a/cpp/src/parquet/types.cc +++ b/cpp/src/parquet/types.cc @@ -479,6 +479,38 @@ std::shared_ptr LogicalType::FromThrift( return UUIDLogicalType::Make(); } else if (type.__isset.FLOAT16) { return Float16LogicalType::Make(); + } else if (type.__isset.GEOMETRY) { + std::string crs; + if (type.GEOMETRY.__isset.crs) { + crs = type.GEOMETRY.crs; + } + + return GeometryLogicalType::Make(crs); + } else if (type.__isset.GEOGRAPHY) { + std::string crs; + if (type.GEOGRAPHY.__isset.crs) { + crs = type.GEOGRAPHY.crs; + } + + LogicalType::EdgeInterpolationAlgorithm::algorithm algorithm = + LogicalType::EdgeInterpolationAlgorithm::UNKNOWN; + if (!type.GEOGRAPHY.__isset.algorithm || + type.GEOGRAPHY.algorithm == format::EdgeInterpolationAlgorithm::SPHERICAL) { + algorithm = LogicalType::EdgeInterpolationAlgorithm::SPHERICAL; + } else if (type.GEOGRAPHY.algorithm == format::EdgeInterpolationAlgorithm::VINCENTY) { + algorithm = LogicalType::EdgeInterpolationAlgorithm::VINCENTY; + } else if (type.GEOGRAPHY.algorithm == format::EdgeInterpolationAlgorithm::THOMAS) { + algorithm = LogicalType::EdgeInterpolationAlgorithm::THOMAS; + } else if (type.GEOGRAPHY.algorithm == format::EdgeInterpolationAlgorithm::ANDOYER) { + algorithm = LogicalType::EdgeInterpolationAlgorithm::ANDOYER; + } else if (type.GEOGRAPHY.algorithm == format::EdgeInterpolationAlgorithm::KARNEY) { + algorithm = LogicalType::EdgeInterpolationAlgorithm::KARNEY; + } else { + throw ParquetException("Unknown value for geometry algorithm: ", + type.GEOGRAPHY.algorithm); + } + + return GeographyLogicalType::Make(crs, algorithm); } else { throw ParquetException("Metadata contains Thrift LogicalType that is not recognized"); } @@ -536,6 +568,15 @@ std::shared_ptr LogicalType::Float16() { return Float16LogicalType::Make(); } +std::shared_ptr LogicalType::Geometry(std::string crs) { + return GeometryLogicalType::Make(std::move(crs)); +} + +std::shared_ptr LogicalType::Geography( + std::string crs, LogicalType::EdgeInterpolationAlgorithm::algorithm algorithm) { + return GeographyLogicalType::Make(std::move(crs), algorithm); +} + std::shared_ptr LogicalType::None() { return NoLogicalType::Make(); } /* @@ -618,6 +659,8 @@ class LogicalType::Impl { class BSON; class UUID; class Float16; + class Geometry; + class Geography; class No; class Undefined; @@ -690,6 +733,12 @@ bool LogicalType::is_UUID() const { return impl_->type() == LogicalType::Type::U bool LogicalType::is_float16() const { return impl_->type() == LogicalType::Type::FLOAT16; } +bool LogicalType::is_geometry() const { + return impl_->type() == LogicalType::Type::GEOMETRY; +} +bool LogicalType::is_geography() const { + return impl_->type() == LogicalType::Type::GEOGRAPHY; +} bool LogicalType::is_none() const { return impl_->type() == LogicalType::Type::NONE; } bool LogicalType::is_valid() const { return impl_->type() != LogicalType::Type::UNDEFINED; @@ -1619,6 +1668,204 @@ class LogicalType::Impl::Float16 final : public LogicalType::Impl::Incompatible, GENERATE_MAKE(Float16) +class LogicalType::Impl::Geometry final : public LogicalType::Impl::Incompatible, + public LogicalType::Impl::SimpleApplicable { + public: + friend class GeometryLogicalType; + + std::string ToString() const override; + std::string ToJSON() const override; + format::LogicalType ToThrift() const override; + bool Equals(const LogicalType& other) const override; + + const std::string& crs() const { return crs_; } + + private: + explicit Geometry(std::string crs) + : LogicalType::Impl(LogicalType::Type::GEOMETRY, SortOrder::UNKNOWN), + LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY), + crs_(std::move(crs)) {} + + std::string crs_; +}; + +std::string LogicalType::Impl::Geometry::ToString() const { + std::stringstream type; + type << "Geometry(crs=" << crs_ << ")"; + return type.str(); +} + +std::string LogicalType::Impl::Geometry::ToJSON() const { + std::stringstream json; + json << R"({"Type": "Geometry")"; + + if (!crs_.empty()) { + // TODO(paleolimbot): For documented cases the CRS shouldn't contain quotes, + // but we should probably escape the value of crs_ for backslash and quotes + // to be safe. + json << R"(, "crs": ")" << crs_ << R"(")"; + } + + json << "}"; + return json.str(); +} + +format::LogicalType LogicalType::Impl::Geometry::ToThrift() const { + format::LogicalType type; + format::GeometryType geometry_type; + + // Canonially export crs of "" as an unset CRS + if (!crs_.empty()) { + geometry_type.__set_crs(crs_); + } + + type.__set_GEOMETRY(geometry_type); + return type; +} + +bool LogicalType::Impl::Geometry::Equals(const LogicalType& other) const { + if (other.is_geometry()) { + const auto& other_geometry = checked_cast(other); + return crs() == other_geometry.crs(); + } else { + return false; + } +} + +const std::string& GeometryLogicalType::crs() const { + return (dynamic_cast(*impl_)).crs(); +} + +std::shared_ptr GeometryLogicalType::Make(std::string crs) { + auto* logical_type = new GeometryLogicalType(); + logical_type->impl_.reset(new LogicalType::Impl::Geometry(std::move(crs))); + return std::shared_ptr(logical_type); +} + +class LogicalType::Impl::Geography final : public LogicalType::Impl::Incompatible, + public LogicalType::Impl::SimpleApplicable { + public: + friend class GeographyLogicalType; + + std::string ToString() const override; + std::string ToJSON() const override; + format::LogicalType ToThrift() const override; + bool Equals(const LogicalType& other) const override; + + const std::string& crs() const { return crs_; } + LogicalType::EdgeInterpolationAlgorithm::algorithm algorithm() const { + return algorithm_; + } + + const char* algorithm_name() const { + switch (algorithm_) { + case LogicalType::EdgeInterpolationAlgorithm::SPHERICAL: + return "spherical"; + case LogicalType::EdgeInterpolationAlgorithm::VINCENTY: + return "vincenty"; + case LogicalType::EdgeInterpolationAlgorithm::THOMAS: + return "thomas"; + case LogicalType::EdgeInterpolationAlgorithm::ANDOYER: + return "andoyer"; + case LogicalType::EdgeInterpolationAlgorithm::KARNEY: + return "karney"; + default: + return "unknown"; + } + } + + private: + Geography(std::string crs, LogicalType::EdgeInterpolationAlgorithm::algorithm algorithm) + : LogicalType::Impl(LogicalType::Type::GEOGRAPHY, SortOrder::UNKNOWN), + LogicalType::Impl::SimpleApplicable(parquet::Type::BYTE_ARRAY), + crs_(std::move(crs)), + algorithm_(algorithm) {} + + std::string crs_; + LogicalType::EdgeInterpolationAlgorithm::algorithm algorithm_; +}; + +std::string LogicalType::Impl::Geography::ToString() const { + std::stringstream type; + type << "Geography(crs=" << crs_ << ", algorithm=" << algorithm_name() << ")"; + return type.str(); +} + +std::string LogicalType::Impl::Geography::ToJSON() const { + std::stringstream json; + json << R"({"Type": "Geography")"; + + if (!crs_.empty()) { + // TODO(paleolimbot): For documented cases the CRS shouldn't contain quotes, + // but we should probably escape the value of crs_ for backslash and quotes + // to be safe. + json << R"(, "crs": ")" << crs_ << R"(")"; + } + + if (algorithm_ != LogicalType::EdgeInterpolationAlgorithm::SPHERICAL) { + json << R"(, "algorithm": ")" << algorithm_name() << R"(")"; + } + + json << "}"; + return json.str(); +} + +format::LogicalType LogicalType::Impl::Geography::ToThrift() const { + format::LogicalType type; + format::GeographyType geography_type; + + // Canonially export crs of "" as an unset CRS + if (!crs_.empty()) { + geography_type.__set_crs(crs_); + } + + if (algorithm_ == LogicalType::EdgeInterpolationAlgorithm::SPHERICAL) { + // Canonically export spherical algorithm as unset + } else if (algorithm_ == LogicalType::EdgeInterpolationAlgorithm::VINCENTY) { + geography_type.__set_algorithm(format::EdgeInterpolationAlgorithm::VINCENTY); + } else if (algorithm_ == LogicalType::EdgeInterpolationAlgorithm::THOMAS) { + geography_type.__set_algorithm(format::EdgeInterpolationAlgorithm::THOMAS); + } else if (algorithm_ == LogicalType::EdgeInterpolationAlgorithm::ANDOYER) { + geography_type.__set_algorithm(format::EdgeInterpolationAlgorithm::ANDOYER); + } else if (algorithm_ == LogicalType::EdgeInterpolationAlgorithm::KARNEY) { + geography_type.__set_algorithm(format::EdgeInterpolationAlgorithm::KARNEY); + } else { + throw ParquetException("Unknown value for geometry algorithm: ", algorithm_); + } + + type.__set_GEOGRAPHY(geography_type); + return type; +} + +bool LogicalType::Impl::Geography::Equals(const LogicalType& other) const { + if (other.is_geography()) { + const auto& other_geography = checked_cast(other); + return crs() == other_geography.crs() && algorithm() == other_geography.algorithm(); + } else { + return false; + } +} + +const std::string& GeographyLogicalType::crs() const { + return (dynamic_cast(*impl_)).crs(); +} + +LogicalType::EdgeInterpolationAlgorithm::algorithm GeographyLogicalType::algorithm() + const { + return (dynamic_cast(*impl_)).algorithm(); +} + +const char* GeographyLogicalType::algorithm_name() const { + return (dynamic_cast(*impl_)).algorithm_name(); +} + +std::shared_ptr GeographyLogicalType::Make( + std::string crs, LogicalType::EdgeInterpolationAlgorithm::algorithm algorithm) { + auto* logical_type = new GeographyLogicalType(); + logical_type->impl_.reset(new LogicalType::Impl::Geography(std::move(crs), algorithm)); + return std::shared_ptr(logical_type); +} + class LogicalType::Impl::No final : public LogicalType::Impl::SimpleCompatible, public LogicalType::Impl::UniversalApplicable { public: diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h index 70a1c4b73ec45..be58badb492c0 100644 --- a/cpp/src/parquet/types.h +++ b/cpp/src/parquet/types.h @@ -158,6 +158,8 @@ class PARQUET_EXPORT LogicalType { BSON, UUID, FLOAT16, + GEOMETRY, + GEOGRAPHY, NONE // Not a real logical type; should always be last element }; }; @@ -166,6 +168,17 @@ class PARQUET_EXPORT LogicalType { enum unit { UNKNOWN = 0, MILLIS = 1, MICROS, NANOS }; }; + struct EdgeInterpolationAlgorithm { + enum algorithm { + UNKNOWN = 0, + SPHERICAL = 1, + VINCENTY = 2, + THOMAS = 3, + ANDOYER = 4, + KARNEY = 5 + }; + }; + /// \brief If possible, return a logical type equivalent to the given legacy /// converted type (and decimal metadata if applicable). static std::shared_ptr FromConvertedType( @@ -213,6 +226,12 @@ class PARQUET_EXPORT LogicalType { static std::shared_ptr UUID(); static std::shared_ptr Float16(); + static std::shared_ptr Geometry(std::string crs = ""); + + static std::shared_ptr Geography( + std::string crs = "", LogicalType::EdgeInterpolationAlgorithm::algorithm algorithm = + EdgeInterpolationAlgorithm::SPHERICAL); + /// \brief Create a placeholder for when no logical type is specified static std::shared_ptr None(); @@ -266,6 +285,8 @@ class PARQUET_EXPORT LogicalType { bool is_BSON() const; bool is_UUID() const; bool is_float16() const; + bool is_geometry() const; + bool is_geography() const; bool is_none() const; /// \brief Return true if this logical type is of a known type. bool is_valid() const; @@ -446,6 +467,30 @@ class PARQUET_EXPORT Float16LogicalType : public LogicalType { Float16LogicalType() = default; }; +class PARQUET_EXPORT GeometryLogicalType : public LogicalType { + public: + static std::shared_ptr Make(std::string crs = ""); + + const std::string& crs() const; + + private: + GeometryLogicalType() = default; +}; + +class PARQUET_EXPORT GeographyLogicalType : public LogicalType { + public: + static std::shared_ptr Make( + std::string crs = "", LogicalType::EdgeInterpolationAlgorithm::algorithm algorithm = + EdgeInterpolationAlgorithm::SPHERICAL); + + const std::string& crs() const; + LogicalType::EdgeInterpolationAlgorithm::algorithm algorithm() const; + const char* algorithm_name() const; + + private: + GeographyLogicalType() = default; +}; + /// \brief Allowed for any physical type. class PARQUET_EXPORT NoLogicalType : public LogicalType { public: diff --git a/geometry.parquet b/geometry.parquet new file mode 100644 index 0000000000000..8ae7078d6bbe2 Binary files /dev/null and b/geometry.parquet differ diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index 863c928591937..9fabf528d1678 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -703,7 +703,7 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): cache_options : pyarrow.CacheOptions, default None Cache options used when pre_buffer is enabled. The default values should be good for most use cases. You may want to adjust these for example if - you have exceptionally high latency to the file system. + you have exceptionally high latency to the file system. thrift_string_size_limit : int, default None If not None, override the maximum total string size allocated when decoding Thrift structures. The default limit should be @@ -720,6 +720,9 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): Parquet file. page_checksum_verification : bool, default False If True, verify the page checksum for each page read from the file. + arrow_extensions_enabled : bool, default False + If True, read Parquet logical types as Arrow Extension Types where possible, + (e.g., JSON arrow.json). """ # Avoid mistakingly creating attributes @@ -733,7 +736,8 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): thrift_container_size_limit=None, decryption_config=None, decryption_properties=None, - bint page_checksum_verification=False): + bint page_checksum_verification=False, + bint arrow_extensions_enabled=False): self.init(shared_ptr[CFragmentScanOptions]( new CParquetFragmentScanOptions())) self.use_buffered_stream = use_buffered_stream @@ -752,6 +756,7 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): if decryption_properties is not None: self.decryption_properties = decryption_properties self.page_checksum_verification = page_checksum_verification + self.arrow_extensions_enabled = arrow_extensions_enabled cdef void init(self, const shared_ptr[CFragmentScanOptions]& sp): FragmentScanOptions.init(self, sp) @@ -868,6 +873,14 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): def page_checksum_verification(self, bint page_checksum_verification): self.reader_properties().set_page_checksum_verification(page_checksum_verification) + @property + def arrow_extensions_enabled(self): + return self.arrow_reader_properties().get_arrow_extensions_enabled() + + @arrow_extensions_enabled.setter + def arrow_extensions_enabled(self, bint arrow_extensions_enabled): + self.arrow_reader_properties().set_arrow_extensions_enabled(arrow_extensions_enabled) + def equals(self, ParquetFragmentScanOptions other): """ Parameters @@ -881,11 +894,12 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): attrs = ( self.use_buffered_stream, self.buffer_size, self.pre_buffer, self.cache_options, self.thrift_string_size_limit, self.thrift_container_size_limit, - self.page_checksum_verification) + self.page_checksum_verification, self.arrow_extensions_enabled) other_attrs = ( other.use_buffered_stream, other.buffer_size, other.pre_buffer, other.cache_options, other.thrift_string_size_limit, - other.thrift_container_size_limit, other.page_checksum_verification) + other.thrift_container_size_limit, other.page_checksum_verification, + other.arrow_extensions_enabled) return attrs == other_attrs @staticmethod @@ -902,7 +916,8 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): cache_options=self.cache_options, thrift_string_size_limit=self.thrift_string_size_limit, thrift_container_size_limit=self.thrift_container_size_limit, - page_checksum_verification=self.page_checksum_verification + page_checksum_verification=self.page_checksum_verification, + arrow_extensions_enabled=self.arrow_extensions_enabled ) return ParquetFragmentScanOptions._reconstruct, (kwargs,) diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd index c17c3b70d7f41..ed3b75ec125da 100644 --- a/python/pyarrow/_parquet.pxd +++ b/python/pyarrow/_parquet.pxd @@ -68,6 +68,8 @@ cdef extern from "parquet/api/schema.h" namespace "parquet" nogil: ParquetLogicalType_JSON" parquet::LogicalType::Type::JSON" ParquetLogicalType_BSON" parquet::LogicalType::Type::BSON" ParquetLogicalType_UUID" parquet::LogicalType::Type::UUID" + ParquetLogicalType_GEOMETRY" parquet::LogicalType::Type::GEOMETRY" + ParquetLogicalType_GEOGRAPHY" parquet::LogicalType::Type::GEOGRAPHY" ParquetLogicalType_NONE" parquet::LogicalType::Type::NONE" enum ParquetTimeUnit" parquet::LogicalType::TimeUnit::unit": @@ -76,6 +78,14 @@ cdef extern from "parquet/api/schema.h" namespace "parquet" nogil: ParquetTimeUnit_MICROS" parquet::LogicalType::TimeUnit::MICROS" ParquetTimeUnit_NANOS" parquet::LogicalType::TimeUnit::NANOS" + enum ParquetEdgeInterpolationAlgorithm" parquet::LogicalType::EdgeInterpolationAlgorithm::algorithm": + ParquetEdgeInterpolationAlgorithm_UNKNOWN" parquet::LogicalType::EdgeInterpolationAlgorithm::UNKNOWN" + ParquetEdgeInterpolationAlgorithm_SPHERICAL" parquet::LogicalType::EdgeInterpolationAlgorithm::SPHERICAL" + ParquetEdgeInterpolationAlgorithm_VINCENTY" parquet::LogicalType::EdgeInterpolationAlgorithm::VINCENTY" + ParquetEdgeInterpolationAlgorithm_THOMAS" parquet::LogicalType::EdgeInterpolationAlgorithm::THOMAS" + ParquetEdgeInterpolationAlgorithm_ANDOYER" parquet::LogicalType::EdgeInterpolationAlgorithm::ANDOYER" + ParquetEdgeInterpolationAlgorithm_KARNEY" parquet::LogicalType::EdgeInterpolationAlgorithm::KARNEY" + enum ParquetConvertedType" parquet::ConvertedType::type": ParquetConvertedType_NONE" parquet::ConvertedType::NONE" ParquetConvertedType_UTF8" parquet::ConvertedType::UTF8" @@ -168,6 +178,15 @@ cdef extern from "parquet/api/schema.h" namespace "parquet" nogil: c_bool is_adjusted_to_utc() const ParquetTimeUnit time_unit() const + cdef cppclass CParquetGeometryType \ + " parquet::GeometryLogicalType"(CParquetLogicalType): + c_string crs() const + + cdef cppclass CParquetGeographyType \ + " parquet::GeographyLogicalType"(CParquetLogicalType): + c_string crs() const + ParquetEdgeInterpolationAlgorithm algorithm() const + cdef cppclass ColumnDescriptor" parquet::ColumnDescriptor": c_bool Equals(const ColumnDescriptor& other) @@ -305,6 +324,23 @@ cdef extern from "parquet/api/reader.h" namespace "parquet" nogil: int64_t offset int32_t length + cdef cppclass CParquetEncodedGeospatialStatistics" parquet::EncodedGeospatialStatistics": + double xmin + double xmax + double ymin + double ymax + double zmin + double zmax + double mmin + double mmax + vector[int32_t] geospatial_types + + c_bool has_z() const + c_bool has_m() const + + cdef cppclass CParquetGeospatialStatistics" parquet::GeospatialStatistics": + CParquetEncodedGeospatialStatistics Encode() const + cdef cppclass CColumnChunkMetaData" parquet::ColumnChunkMetaData": int64_t file_offset() const const c_string& file_path() const @@ -315,6 +351,8 @@ cdef extern from "parquet/api/reader.h" namespace "parquet" nogil: shared_ptr[ColumnPath] path_in_schema() const bint is_stats_set() const shared_ptr[CStatistics] statistics() const + c_bool is_geometry_stats_set() const + shared_ptr[CParquetGeospatialStatistics] geometry_statistics() const ParquetCompression compression() const const vector[ParquetEncoding]& encodings() const c_bool Equals(const CColumnChunkMetaData&) const @@ -405,6 +443,8 @@ cdef extern from "parquet/api/reader.h" namespace "parquet" nogil: CCacheOptions cache_options() const void set_coerce_int96_timestamp_unit(TimeUnit unit) TimeUnit coerce_int96_timestamp_unit() const + void set_arrow_extensions_enabled(c_bool extensions_enabled) + c_bool get_arrow_extensions_enabled() const ArrowReaderProperties default_arrow_reader_properties() @@ -460,6 +500,7 @@ cdef extern from "parquet/api/writer.h" namespace "parquet" nogil: Builder* enable_compliant_nested_types() Builder* disable_compliant_nested_types() Builder* set_engine_version(ArrowWriterEngineVersion version) + Builder* write_geospatial_logical_types() shared_ptr[ArrowWriterProperties] build() c_bool support_deprecated_int96_timestamps() @@ -607,6 +648,7 @@ cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties( writer_engine_version=*, use_compliant_nested_type=*, store_schema=*, + write_geospatial_logical_types=*, ) except * cdef class ParquetSchema(_Weakrefable): @@ -652,6 +694,16 @@ cdef class Statistics(_Weakrefable): self.statistics = statistics self.parent = parent +cdef class GeospatialStatistics(_Weakrefable): + cdef: + CParquetEncodedGeospatialStatistics statistics + ColumnChunkMetaData parent + + cdef inline init(self, const shared_ptr[CParquetGeospatialStatistics]& statistics, + ColumnChunkMetaData parent): + self.statistics = statistics.get().Encode() + self.parent = parent + cdef extern from "parquet/encryption/encryption.h" namespace "parquet" nogil: cdef cppclass CFileDecryptionProperties\ " parquet::FileDecryptionProperties": diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 2fb1e41641f8e..c1f9b74affc69 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -312,6 +312,93 @@ cdef _box_flba(ParquetFLBA val, uint32_t len): return cp.PyBytes_FromStringAndSize( val.ptr, len) +cdef class GeospatialStatistics(_Weakrefable): + """Statistics for columns with geospatial data types""" + + def __cinit__(self): + pass + + def __repr__(self): + return """{} + geospatial_types: {} + xmin: {} + xmax: {} + ymin: {} + ymax: {} + zmin: {} + zmax: {} + mmin: {} + mmax: {}""".format(object.__repr__(self), + self.geospatial_types, + self.xmin, self.xmax, + self.ymin, self.ymax, + self.zmin, self.zmax, + self.mmin, self.mmax) + + def to_dict(self): + out = { + "geospatial_types": self.geospatial_types, + "xmin": self.xmin, + "xmax": self.xmax, + "ymin": self.ymin, + "ymax": self.ymax, + "zmin": self.zmin, + "zmax": self.zmax, + "mmin": self.mmin, + "mmax": self.mmax + } + + return out + + @property + def geospatial_types(self): + return list(self.statistics.geospatial_types) + + @property + def xmin(self): + return self.statistics.xmin + + @property + def xmax(self): + return self.statistics.xmax + + @property + def ymin(self): + return self.statistics.ymin + + @property + def ymax(self): + return self.statistics.ymax + + @property + def zmin(self): + if self.statistics.has_z(): + return self.statistics.xmin + else: + return None + + @property + def zmax(self): + if self.statistics.has_z(): + return self.statistics.xmax + else: + return None + + @property + def mmin(self): + if self.statistics.has_m(): + return self.statistics.mmin + else: + return None + + @property + def mmax(self): + if self.statistics.has_m(): + return self.statistics.mmax + else: + return None + + cdef class ColumnChunkMetaData(_Weakrefable): """Column metadata for a single row group.""" @@ -320,6 +407,7 @@ cdef class ColumnChunkMetaData(_Weakrefable): def __repr__(self): statistics = indent(repr(self.statistics), 4 * ' ') + geospatial_statistics = indent(repr(self.geospatial_statistics), 4 * ' ') return """{0} file_offset: {1} file_path: {2} @@ -329,13 +417,15 @@ cdef class ColumnChunkMetaData(_Weakrefable): is_stats_set: {6} statistics: {7} - compression: {8} - encodings: {9} - has_dictionary_page: {10} - dictionary_page_offset: {11} - data_page_offset: {12} - total_compressed_size: {13} - total_uncompressed_size: {14}""".format(object.__repr__(self), + geospatial_statistics: +{8} + compression: {9} + encodings: {10} + has_dictionary_page: {11} + dictionary_page_offset: {12} + data_page_offset: {13} + total_compressed_size: {14} + total_uncompressed_size: {15}""".format(object.__repr__(self), self.file_offset, self.file_path, self.physical_type, @@ -343,6 +433,7 @@ cdef class ColumnChunkMetaData(_Weakrefable): self.path_in_schema, self.is_stats_set, statistics, + geospatial_statistics, self.compression, self.encodings, self.has_dictionary_page, @@ -361,6 +452,11 @@ cdef class ColumnChunkMetaData(_Weakrefable): Dictionary with a key for each attribute of this class. """ statistics = self.statistics.to_dict() if self.is_stats_set else None + if self.is_geometry_stats_set: + geospatial_statistics = self.geospatial_statistics.to_dict() + else: + geospatial_statistics = None + d = dict( file_offset=self.file_offset, file_path=self.file_path, @@ -369,6 +465,7 @@ cdef class ColumnChunkMetaData(_Weakrefable): path_in_schema=self.path_in_schema, is_stats_set=self.is_stats_set, statistics=statistics, + geospatial_statistics=geospatial_statistics, compression=self.compression, encodings=self.encodings, has_dictionary_page=self.has_dictionary_page, @@ -440,6 +537,20 @@ cdef class ColumnChunkMetaData(_Weakrefable): statistics.init(self.metadata.statistics(), self) return statistics + @property + def is_geometry_stats_set(self): + """Whether or not geometry statistics are present in metadata (bool).""" + return self.metadata.is_geometry_stats_set() + + @property + def geospatial_statistics(self): + """Statistics for column chunk (:class:`GeospatialStatistics`).""" + if not self.metadata.is_geometry_stats_set(): + return None + geospatial_statistics = GeospatialStatistics() + geospatial_statistics.init(self.metadata.geometry_statistics(), self) + return geospatial_statistics + @property def compression(self): """ @@ -1441,7 +1552,8 @@ cdef class ParquetReader(_Weakrefable): FileDecryptionProperties decryption_properties=None, thrift_string_size_limit=None, thrift_container_size_limit=None, - page_checksum_verification=False): + page_checksum_verification=False, + arrow_extensions_enabled=False): """ Open a parquet file for reading. @@ -1458,6 +1570,7 @@ cdef class ParquetReader(_Weakrefable): thrift_string_size_limit : int, optional thrift_container_size_limit : int, optional page_checksum_verification : bool, default False + arrow_extensions_enabled : bool, default False """ cdef: shared_ptr[CFileMetaData] c_metadata @@ -1507,6 +1620,8 @@ cdef class ParquetReader(_Weakrefable): arrow_props.set_coerce_int96_timestamp_unit( string_to_timeunit(coerce_int96_timestamp_unit)) + arrow_props.set_arrow_extensions_enabled(arrow_extensions_enabled) + self.source = source get_reader(source, use_memory_map, &self.rd_handle) @@ -2036,7 +2151,8 @@ cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties( allow_truncated_timestamps=False, writer_engine_version=None, use_compliant_nested_type=True, - store_schema=True) except *: + store_schema=True, + write_geospatial_logical_types=False) except *: """Arrow writer properties""" cdef: shared_ptr[ArrowWriterProperties] arrow_properties @@ -2087,6 +2203,11 @@ cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties( raise ValueError("Unsupported Writer Engine Version: {0}" .format(writer_engine_version)) + # write_geospatial_logical_types + + if write_geospatial_logical_types: + arrow_props.write_geospatial_logical_types() + arrow_properties = arrow_props.build() return arrow_properties @@ -2169,7 +2290,8 @@ cdef class ParquetWriter(_Weakrefable): write_page_index=False, write_page_checksum=False, sorting_columns=None, - store_decimal_as_integer=False): + store_decimal_as_integer=False, + write_geospatial_logical_types=False): cdef: shared_ptr[WriterProperties] properties shared_ptr[ArrowWriterProperties] arrow_properties @@ -2212,6 +2334,7 @@ cdef class ParquetWriter(_Weakrefable): writer_engine_version=writer_engine_version, use_compliant_nested_type=use_compliant_nested_type, store_schema=store_schema, + write_geospatial_logical_types=write_geospatial_logical_types, ) pool = maybe_unbox_memory_pool(memory_pool) diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py index 6ca6f7089e75c..43fc671acb0a8 100644 --- a/python/pyarrow/parquet/core.py +++ b/python/pyarrow/parquet/core.py @@ -255,6 +255,9 @@ class ParquetFile: it will be parsed as an URI to determine the filesystem. page_checksum_verification : bool, default False If True, verify the checksum for each page read from the file. + arrow_extensions_enabled : bool, default False + If True, read Parquet logical types as Arrow Extension Types where possible, + (e.g., JSON arrow.json). Examples -------- @@ -303,7 +306,7 @@ def __init__(self, source, *, metadata=None, common_metadata=None, pre_buffer=False, coerce_int96_timestamp_unit=None, decryption_properties=None, thrift_string_size_limit=None, thrift_container_size_limit=None, filesystem=None, - page_checksum_verification=False): + page_checksum_verification=False, arrow_extensions_enabled=False): self._close_source = getattr(source, 'closed', True) @@ -323,6 +326,7 @@ def __init__(self, source, *, metadata=None, common_metadata=None, thrift_string_size_limit=thrift_string_size_limit, thrift_container_size_limit=thrift_container_size_limit, page_checksum_verification=page_checksum_verification, + arrow_extensions_enabled=arrow_extensions_enabled, ) self.common_metadata = common_metadata self._nested_paths_by_prefix = self._build_nested_paths() @@ -890,6 +894,9 @@ def _sanitize_table(table, new_schema, flavor): - fixed_len_byte_array: for precision > 18. As a consequence, decimal columns stored in integer types are more compact. +write_geospatial_logical_types : bool, default False + Write GEOMETRY and/or GEOGRAPHY logical types when converting GeoArrow types + to Parquet. """ _parquet_writer_example_doc = """\ @@ -986,6 +993,7 @@ def __init__(self, where, schema, filesystem=None, write_page_checksum=False, sorting_columns=None, store_decimal_as_integer=False, + write_geospatial_logical_types=False, **options): if use_deprecated_int96_timestamps is None: # Use int96 timestamps for Spark @@ -1039,6 +1047,7 @@ def __init__(self, where, schema, filesystem=None, write_page_checksum=write_page_checksum, sorting_columns=sorting_columns, store_decimal_as_integer=store_decimal_as_integer, + write_geospatial_logical_types=write_geospatial_logical_types, **options) self.is_open = True @@ -1265,6 +1274,9 @@ class ParquetDataset: sufficient for most Parquet files. page_checksum_verification : bool, default False If True, verify the page checksum for each page read from the file. +arrow_extensions_enabled : bool, default False + If True, read Parquet logical types as Arrow Extension Types where possible, + (e.g., JSON arrow.json). use_legacy_dataset : bool, optional Deprecated and has no effect from PyArrow version 15.0.0. @@ -1280,6 +1292,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None, *, filters=None, decryption_properties=None, thrift_string_size_limit=None, thrift_container_size_limit=None, page_checksum_verification=False, + arrow_extensions_enabled=False, use_legacy_dataset=None): if use_legacy_dataset is not None: @@ -1297,6 +1310,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None, *, filters=None, "thrift_string_size_limit": thrift_string_size_limit, "thrift_container_size_limit": thrift_container_size_limit, "page_checksum_verification": page_checksum_verification, + "arrow_extensions_enabled": arrow_extensions_enabled, } if buffer_size: read_options.update(use_buffered_stream=True, @@ -1686,6 +1700,9 @@ def partitioning(self): sufficient for most Parquet files. page_checksum_verification : bool, default False If True, verify the checksum for each page read from the file. +arrow_extensions_enabled : bool, default False + If True, read Parquet logical types as Arrow Extension Types where possible, + (e.g., JSON arrow.json). Returns ------- @@ -1781,7 +1798,8 @@ def read_table(source, *, columns=None, use_threads=True, coerce_int96_timestamp_unit=None, decryption_properties=None, thrift_string_size_limit=None, thrift_container_size_limit=None, - page_checksum_verification=False): + page_checksum_verification=False, + arrow_extensions_enabled=False): if use_legacy_dataset is not None: warnings.warn( @@ -1806,6 +1824,7 @@ def read_table(source, *, columns=None, use_threads=True, thrift_string_size_limit=thrift_string_size_limit, thrift_container_size_limit=thrift_container_size_limit, page_checksum_verification=page_checksum_verification, + arrow_extensions_enabled=arrow_extensions_enabled, ) except ImportError: # fall back on ParquetFile for simple cases when pyarrow.dataset @@ -1893,6 +1912,7 @@ def write_table(table, where, row_group_size=None, version='2.6', write_page_checksum=False, sorting_columns=None, store_decimal_as_integer=False, + write_geospatial_logical_types=False, **kwargs): # Implementor's note: when adding keywords here / updating defaults, also # update it in write_to_dataset and _dataset_parquet.pyx ParquetFileWriteOptions @@ -1924,6 +1944,7 @@ def write_table(table, where, row_group_size=None, version='2.6', write_page_checksum=write_page_checksum, sorting_columns=sorting_columns, store_decimal_as_integer=store_decimal_as_integer, + write_geospatial_logical_types=write_geospatial_logical_types, **kwargs) as writer: writer.write_table(table, row_group_size=row_group_size) except Exception: