From 666c49a7a259dd0f2124a53bc71fd13789bc2489 Mon Sep 17 00:00:00 2001 From: Ildar Musin Date: Mon, 21 Dec 2020 17:18:02 +0100 Subject: [PATCH] Bugfix: rowgroup filtering by float and double attributes (#28) --- data/example1.parquet | Bin 6813 -> 7220 bytes data/example2.parquet | Bin 4594 -> 4763 bytes data/generate.py | 2 +- input/parquet_fdw.source | 6 ++++- output/import.source | 4 +-- output/parquet_fdw.source | 54 ++++++++++++++++++++++++++++++-------- parquet_impl.cpp | 4 +-- 7 files changed, 53 insertions(+), 17 deletions(-) diff --git a/data/example1.parquet b/data/example1.parquet index c6c1f2c557cffadc015a60035f792ceba521b709..6388e22a878c6a221d3163b90bc20d1c2f29dd35 100644 GIT binary patch delta 1996 zcmbtU&2JM&6rUY?Z3pZon5@~@PH;Y$S`@|kAk?X7$!we$qd3W$MzL4w!5=^z@?jBN zmxweRdZ-Ykv{O~8L@N$GAw-d&?IrvJsLG+L5QiQ(aOkl@J#eT5^Jbl5?V6&hWBI+A zH*bFP-fw>PwHJ*rBQD!~!qQAe0J zh!o7qo+W(8&(gk-d}9xJ+{TuD_9{Mep2OP;ljIJ0tDQXE%l6Cmq!aQt_@@r0OP#wP z;=oNzF016qKIU`TQ{%2Vd^60+3ibRJBAk{N5MdIop{3 z3KXkwdRm^eTVnhU(p0BPlf;)YjlN!{fNWK1Gtf&yrAPj&N&(&!Y2?)+g(Ry+57-+E zJlGX*ktY)O?ff|}602mJ)!kgN%`#2FMqe*jDA%gjW`I_?Hp?;98u?eX)`j)K{}mRm zJMai7+*Y`6$NYz=x+?jVQhrVS`RF`su{=dp9E>qAN$_j=ioqw!0snwT0a~?fkcPyx@xBKdv%+<;Uwy zLHF|xe!BzPT94raca1%G53}m~%}-r03( z=`=m0Z`0s$DLQ*Yt^w*d|<2uvzN|P z3-*Fl%+05Y)BUFgGE-l!X49uCcBx#jOYzG1@ZhSw&_`IX3L=a)YOCTb79^-~j51iE zU{h*MQ#wHbPKy;AH8zg^GtF*U&>)b=EavBFD8QMdz*7Iy zY3apVnD3J1cFCnF_$ePa`KX*-nM&Aw`*^}VJBoUHV`9$$rCQY-3FN>ejkyu;` zk_6-%fz&1qS`$CSNm^*X0YFg#oDL2)?&}6h96)v|HJ2l}Bo2@t%`cPff{hJuo}N(l qX$+w{3uYyN2sQufiEuLVFOVwSOxB diff --git a/data/example2.parquet b/data/example2.parquet index 589ae651c209e7821f89cb6fb724e0d2ad1cceb2..004c048b2466d872218d8fd1484e6df2831ec75d 100644 GIT binary patch delta 1087 zcma)3O>7cT5PrKX*pf;yDP-H?U#+Au6$%z9jZK_|LNQQ&lmL693x%zvAvTC@VlYNM zdCDJ@@SVlaGVLlMadf zMB*TLJl%ONe4{_4acNs}zoU!~cV)PUI zcR$R_+39y$-5r6QLxfFNwlqp><Z-Q3*jVyU>nR=d=QTIab?H^JpF^BEIpr z;1;iFfm3~kbkpMA-lT7^&<0o1OTJ?3@8~0 zvv~k$5J6)@9dFDd&RGWvGU7PhSg5IHZ~`slI~WG%z!#ImH8)4!o|x?$vk{VptnUvw CF9E;+ delta 935 zcma)3O>7cD6y9B?TZJOgSh7utN&VS0F~EYhmd2>FTUfTT5wKX;lgQ5k7HX<=6B2_V zG4ZIKi-|p&UOn{Y+4yty;LpW))T;+i9-KD|woNo~4)4u--@Ny}_sz32KTbV<^W@_; z|B6^+r_oa+Z;@?2Og{59{w29AgpPk1mw$?(d2SZS>{OxeL@^3AEW~r#wPJMHXKF&i;PDe9HcTvEC0#lyBOl%?TX_&9;8HK z^W=qh`h*fWck(_bP2ha~+UnxnN~6{;OAi83ICD1n=A99+Lwvp~{-qLjFjg@==Iz~v z?w1wQ;MRZ&YH5xK&*4l~)s8qqS}utz_5K&i9Ot0<}7rik|9P zda5^yO`N1%0nrLzi|k6rVE`~bo4pW?LYONtd&_V5$i#ATPn1(sK z1h`wU_g>kA_kg$qtoB;mVz6q!>NKxvO@_09Y;6{TDPav3>C=mQOjj4(+N>50ma6|D zP}s+Kk3fv!0I5njy}ZVTUtOW27icbNu0R)+y=Mwwb{#2_WApmhY>f)EG!(!O4L5=d z&W!M3E7nE5Q`K+WV>r`XRjTm?kb$zXu}Z}o_Hn1Mhe>T2{C+|hhiS$JY3a%pkYspL Zt_Lo9qU*|vF)1BeJoMhraao*?{RX0n`d9z} diff --git a/data/generate.py b/data/generate.py index d25d4c9..b23bdc9 100755 --- a/data/generate.py +++ b/data/generate.py @@ -32,7 +32,7 @@ date(2018, 1, 5), date(2018, 1, 6)], 'six': [False, False, False], - 'seven': [0.5, None, 1.0]}) + 'seven': [1.5, None, 2.0]}) table2 = pa.Table.from_pandas(df2) with pq.ParquetWriter('example1.parquet', table1.schema) as writer: diff --git a/input/parquet_fdw.source b/input/parquet_fdw.source index 6d6375a..bfb07aa 100644 --- a/input/parquet_fdw.source +++ b/input/parquet_fdw.source @@ -42,7 +42,11 @@ SELECT * FROM example1 WHERE one = 2; SELECT * FROM example1 WHERE one = 7; SELECT * FROM example1 WHERE six = true; SELECT * FROM example1 WHERE six = false; -SELECT * FROM example1 WHERE seven < 0.9; +SELECT * FROM example1 WHERE seven < 1.5; +SELECT * FROM example1 WHERE seven <= 1.5; +SELECT * FROM example1 WHERE seven = 1.5; +SELECT * FROM example1 WHERE seven > 1; +SELECT * FROM example1 WHERE seven >= 1; SELECT * FROM example1 WHERE seven IS NULL; -- prepared statements diff --git a/output/import.source b/output/import.source index 2431ee4..7bc5bb4 100644 --- a/output/import.source +++ b/output/import.source @@ -51,10 +51,10 @@ SELECT * FROM example_import ORDER BY one, three; 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 3 | {21,22} | zwei | 2018-01-03 00:00:00 | 2018-01-03 | f | - 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 + 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 1.5 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | 5 | {23,24} | drei | 2018-01-05 00:00:00 | 2018-01-05 | t | - 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 + 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 2 7 | {25,26} | vier | 2018-01-07 00:00:00 | 2018-01-07 | f | 9 | {27,28} | fünf | 2018-01-09 00:00:00 | 2018-01-09 | t | (11 rows) diff --git a/output/parquet_fdw.source b/output/parquet_fdw.source index 00512a8..b94b6eb 100644 --- a/output/parquet_fdw.source +++ b/output/parquet_fdw.source @@ -24,9 +24,9 @@ SELECT * FROM example1; 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 - 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 + 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 1.5 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | - 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 + 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 2 (6 rows) -- no explicit columns mentions @@ -93,7 +93,7 @@ SELECT * FROM example1 WHERE one >= 6; DEBUG: parquet_fdw: skip rowgroup 1 one | two | three | four | five | six | seven -----+------------+-------+---------------------+------------+-----+------- - 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 + 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 2 (1 row) SELECT * FROM example1 WHERE one = 2; @@ -122,18 +122,50 @@ SELECT * FROM example1 WHERE six = false; one | two | three | four | five | six | seven -----+------------+-------+---------------------+------------+-----+------- 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | - 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 + 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 1.5 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | - 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 + 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 2 (4 rows) -SELECT * FROM example1 WHERE seven < 0.9; +SELECT * FROM example1 WHERE seven < 1.5; +DEBUG: parquet_fdw: skip rowgroup 2 + one | two | three | four | five | six | seven +-----+---------+-------+---------------------+------------+-----+------- + 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 + 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 +(2 rows) + +SELECT * FROM example1 WHERE seven <= 1.5; one | two | three | four | five | six | seven -----+------------+-------+---------------------+------------+-----+------- 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 - 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 + 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 + 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 1.5 +(3 rows) + +SELECT * FROM example1 WHERE seven = 1.5; +DEBUG: parquet_fdw: skip rowgroup 1 + one | two | three | four | five | six | seven +-----+------------+-------+---------------------+------------+-----+------- + 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 1.5 +(1 row) + +SELECT * FROM example1 WHERE seven > 1; +DEBUG: parquet_fdw: skip rowgroup 1 + one | two | three | four | five | six | seven +-----+------------+-------+---------------------+------------+-----+------- + 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 1.5 + 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 2 (2 rows) +SELECT * FROM example1 WHERE seven >= 1; + one | two | three | four | five | six | seven +-----+------------+-------+---------------------+------------+-----+------- + 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 + 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 1.5 + 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 2 +(3 rows) + SELECT * FROM example1 WHERE seven IS NULL; one | two | three | four | five | six | seven -----+------------+-------+---------------------+------------+-----+------- @@ -261,9 +293,9 @@ SELECT * FROM example_seq; 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 - 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 + 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 1.5 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | - 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 + 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 2 1 | {19,20} | eins | 2018-01-01 00:00:00 | 2018-01-01 | t | 3 | {21,22} | zwei | 2018-01-03 00:00:00 | 2018-01-03 | f | 5 | {23,24} | drei | 2018-01-05 00:00:00 | 2018-01-05 | t | @@ -300,10 +332,10 @@ SELECT * FROM example_sorted ORDER BY one; 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | 3 | {21,22} | zwei | 2018-01-03 00:00:00 | 2018-01-03 | f | 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 - 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 + 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 1.5 5 | {23,24} | drei | 2018-01-05 00:00:00 | 2018-01-05 | t | 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | - 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 + 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 2 7 | {25,26} | vier | 2018-01-07 00:00:00 | 2018-01-07 | f | 9 | {27,28} | fünf | 2018-01-09 00:00:00 | 2018-01-09 | t | (11 rows) diff --git a/parquet_impl.cpp b/parquet_impl.cpp index bed8ddf..b5ef589 100644 --- a/parquet_impl.cpp +++ b/parquet_impl.cpp @@ -2798,9 +2798,9 @@ bytes_to_postgres_type(const char *bytes, arrow::DataType *arrow_type) case arrow::Type::INT64: return Int64GetDatum(*(int64 *) bytes); case arrow::Type::FLOAT: - return Int32GetDatum(*(float *) bytes); + return Float4GetDatum(*(float *) bytes); case arrow::Type::DOUBLE: - return Int64GetDatum(*(double *) bytes); + return Float8GetDatum(*(double *) bytes); case arrow::Type::STRING: case arrow::Type::BINARY: return CStringGetTextDatum(bytes);