Skip to content

Commit

Permalink
merge with main, fix up tests for hugeint decimals
Browse files Browse the repository at this point in the history
  • Loading branch information
Tishj committed May 17, 2024
2 parents 5e7513f + 46151ff commit 1255e15
Show file tree
Hide file tree
Showing 24 changed files with 1,024 additions and 236 deletions.
17 changes: 10 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
.PHONY: duckdb install_duckdb clean_duckdb lintcheck
.PHONY: duckdb install_duckdb clean_duckdb lintcheck .depend

MODULE_big = quack
EXTENSION = quack
DATA = quack.control $(wildcard quack--*.sql)

SRCS = src/quack_heap_seq_scan.cpp \
SRCS = src/quack_detoast.cpp \
src/quack_filter.cpp \
src/quack_heap_scan.cpp \
src/quack_heap_seq_scan.cpp \
src/quack_hooks.cpp \
src/quack_select.cpp \
src/quack_types.cpp \
src/quack_memory_allocator.cpp \
src/quack_node.cpp \
src/quack_planner.cpp \
src/quack_types.cpp \
src/quack.cpp

OBJS = $(subst .cpp,.o, $(SRCS))
Expand All @@ -35,7 +38,7 @@ else
QUACK_BUILD_DUCKDB = release
endif

override PG_CPPFLAGS += -Iinclude -Ithird_party/duckdb/src/include -std=c++17 ${QUACK_BUILD_CXX_FLAGS}
override PG_CPPFLAGS += -Iinclude -Ithird_party/duckdb/src/include -std=c++17 -Wno-sign-compare ${QUACK_BUILD_CXX_FLAGS}

SHLIB_LINK += -Wl,-rpath,$(PG_LIB)/ -lpq -L$(PG_LIB) -lduckdb -Lthird_party/duckdb/build/$(QUACK_BUILD_DUCKDB)/src -lstdc++

Expand All @@ -53,7 +56,7 @@ ifeq ($(UNAME_S),Linux)
DUCKDB_LIB = libduckdb.so
endif

all: duckdb $(OBJS)
all: duckdb $(OBJS) .depend

include $(PGXS)

Expand All @@ -63,7 +66,7 @@ third_party/duckdb/Makefile:
git submodule update --init --recursive

third_party/duckdb/build/$(QUACK_BUILD_DUCKDB)/src/$(DUCKDB_LIB):
$(MAKE) -C third_party/duckdb $(QUACK_BUILD_DUCKDB) DISABLE_SANITIZER=1 ENABLE_UBSAN=0 BUILD_UNITTESTS=OFF CMAKE_EXPORT_COMPILE_COMMANDS=1
$(MAKE) -C third_party/duckdb $(QUACK_BUILD_DUCKDB) DISABLE_SANITIZER=1 ENABLE_UBSAN=0 BUILD_UNITTESTS=OFF BUILD_HTTPFS=1 CMAKE_EXPORT_COMPILE_COMMANDS=1

install_duckdb:
$(install_bin) -m 755 third_party/duckdb/build/$(QUACK_BUILD_DUCKDB)/src/$(DUCKDB_LIB) $(DESTDIR)$(PG_LIB)
Expand Down
40 changes: 22 additions & 18 deletions expected/basic.out
Original file line number Diff line number Diff line change
Expand Up @@ -4,41 +4,45 @@ INSERT INTO t SELECT g % 10 from generate_series(1,1000000) g;
SET client_min_messages to 'DEBUG3';
SELECT COUNT(*) FROM t;
DEBUG: -- (DuckDB/PostgresHeapBind) Column name: a, Type: INTEGER --
DEBUG: -- (DuckDB/PostgresHeapBind) Column name: a, Type: INTEGER --
DEBUG: -- (DuckDB/PostgresHeapScanGlobalState) Running 1 threads --
count
---------
1000000
count_star()
--------------
1000000
(1 row)

SELECT a, COUNT(*) FROM t WHERE a > 5 GROUP BY a ORDER BY a;
DEBUG: -- (DuckDB/PostgresHeapBind) Column name: a, Type: INTEGER --
DEBUG: -- (DuckDB/PostgresHeapBind) Column name: a, Type: INTEGER --
DEBUG: -- (DuckDB/PostgresHeapScanGlobalState) Running 1 threads --
a | count
---+--------
6 | 100000
7 | 100000
8 | 100000
9 | 100000
a | count_star()
---+--------------
6 | 100000
7 | 100000
8 | 100000
9 | 100000
(4 rows)

SET quack.max_threads_per_query to 4;
SELECT COUNT(*) FROM t;
DEBUG: -- (DuckDB/PostgresHeapBind) Column name: a, Type: INTEGER --
DEBUG: -- (DuckDB/PostgresHeapBind) Column name: a, Type: INTEGER --
DEBUG: -- (DuckDB/PostgresHeapScanGlobalState) Running 4 threads --
count
---------
1000000
count_star()
--------------
1000000
(1 row)

SELECT a, COUNT(*) FROM t WHERE a > 5 GROUP BY a ORDER BY a;
DEBUG: -- (DuckDB/PostgresHeapBind) Column name: a, Type: INTEGER --
DEBUG: -- (DuckDB/PostgresHeapBind) Column name: a, Type: INTEGER --
DEBUG: -- (DuckDB/PostgresHeapScanGlobalState) Running 4 threads --
a | count
---+--------
6 | 100000
7 | 100000
8 | 100000
9 | 100000
a | count_star()
---+--------------
6 | 100000
7 | 100000
8 | 100000
9 | 100000
(4 rows)

SET quack.max_threads_per_query TO default;
Expand Down
2 changes: 2 additions & 0 deletions include/quack/quack.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
#pragma once

// quack.c
extern bool quack_execution;
extern int quack_max_threads_per_query;
extern char *quack_secret;
extern "C" void _PG_init(void);

// quack_hooks.c
Expand Down
15 changes: 15 additions & 0 deletions include/quack/quack_detoast.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#pragma once

#include "duckdb.hpp"

extern "C" {
#include "postgres.h"
}

#include <mutex>

namespace quack {

Datum DetoastPostgresDatum(struct varlena *value, std::mutex &lock, bool *shouldFree);

} // namespace quack
13 changes: 13 additions & 0 deletions include/quack/quack_filter.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#pragma once

#include "duckdb.hpp"

extern "C" {
#include "postgres.h"
}

namespace quack {

bool ApplyValueFilter(duckdb::TableFilter &filter, Datum &value, bool isNull, Oid typeOid);

} // namespace quack
12 changes: 6 additions & 6 deletions include/quack/quack_heap_scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ extern "C" {

// Postgres Relation


namespace quack {

struct PostgresHeapScanLocalState : public duckdb::LocalTableFunctionState {
Expand All @@ -23,15 +22,15 @@ struct PostgresHeapScanLocalState : public duckdb::LocalTableFunctionState {
~PostgresHeapScanLocalState() override;

public:
PostgresHeapSeqScan & m_rel;
PostgresHeapSeqScan &m_rel;
PostgresHeapSeqScanThreadInfo m_thread_seq_scan_info;
bool m_exhausted_scan = false;
};

// Global State

struct PostgresHeapScanGlobalState : public duckdb::GlobalTableFunctionState {
explicit PostgresHeapScanGlobalState(PostgresHeapSeqScan &relation);
explicit PostgresHeapScanGlobalState(PostgresHeapSeqScan &relation, duckdb::TableFunctionInitInput &input);
~PostgresHeapScanGlobalState();
idx_t
MaxThreads() const override {
Expand Down Expand Up @@ -67,7 +66,7 @@ struct PostgresHeapScanFunction : public duckdb::TableFunction {
// LocalTableFunctionState *lstate, GlobalTableFunctionState *gstate); static double PostgresProgress(ClientContext
// &context, const FunctionData *bind_data_p, const GlobalTableFunctionState *gstate);
static void PostgresHeapScanFunc(duckdb::ClientContext &context, duckdb::TableFunctionInput &data_p,
duckdb::DataChunk &output);
duckdb::DataChunk &output);
// static unique_ptr<NodeStatistics> PostgresCardinality(ClientContext &context, const FunctionData *bind_data);
// static idx_t PostgresGetBatchIndex(ClientContext &context, const FunctionData *bind_data_p,
// LocalTableFunctionState *local_state, GlobalTableFunctionState *global_state); static void
Expand All @@ -79,12 +78,13 @@ struct PostgresHeapScanFunction : public duckdb::TableFunction {

struct PostgresHeapReplacementScanData : public duckdb::ReplacementScanData {
public:
PostgresHeapReplacementScanData(QueryDesc *desc) : desc(desc) {
PostgresHeapReplacementScanData(Query *parse, const char *query) : m_parse(parse), m_query(query) {
}
~PostgresHeapReplacementScanData() override {};

public:
QueryDesc *desc;
Query *m_parse;
std::string m_query;
};

duckdb::unique_ptr<duckdb::TableRef> PostgresHeapReplacementScan(duckdb::ClientContext &context,
Expand Down
45 changes: 33 additions & 12 deletions include/quack/quack_heap_seq_scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@ extern "C" {
#include "postgres.h"
#include "access/tableam.h"
#include "access/heapam.h"
#include "storage/bufmgr.h"
}

#include <mutex>
#include <atomic>

namespace quack {

Expand All @@ -31,18 +33,35 @@ class PostgresHeapSeqScanThreadInfo {
HeapTupleData m_tuple;
};

class PostgresHeapSeqScan {
class PostgresHeapSeqParallelScanState {
private:
class ParallelScanState {
public:
ParallelScanState() : m_nblocks(InvalidBlockNumber), m_last_assigned_block_number(InvalidBlockNumber) {
}
BlockNumber AssignNextBlockNumber();
std::mutex m_lock;
BlockNumber m_nblocks;
BlockNumber m_last_assigned_block_number;
};
static int const k_max_prefetch_block_number = 32;

public:
PostgresHeapSeqParallelScanState()
: m_nblocks(InvalidBlockNumber), m_last_assigned_block_number(InvalidBlockNumber), m_count_tuples_only(false),
m_total_row_count(0), m_last_prefetch_block(0), m_strategy(nullptr) {
}
~PostgresHeapSeqParallelScanState() {
if (m_strategy)
pfree(m_strategy);
}
BlockNumber AssignNextBlockNumber();
void PrefetchNextRelationPages(Relation rel);
std::mutex m_lock;
BlockNumber m_nblocks;
BlockNumber m_last_assigned_block_number;
bool m_count_tuples_only;
duckdb::map<duckdb::idx_t, duckdb::idx_t> m_columns;
duckdb::map<duckdb::idx_t, duckdb::idx_t> m_projections;
duckdb::TableFilterSet *m_filters = nullptr;
std::atomic<std::uint32_t> m_total_row_count;
BlockNumber m_last_prefetch_block;
BufferAccessStrategy m_strategy;
};

class PostgresHeapSeqScan {
private:
public:
PostgresHeapSeqScan(RangeTblEntry *table);
~PostgresHeapSeqScan();
Expand All @@ -52,14 +71,15 @@ class PostgresHeapSeqScan {
PostgresHeapSeqScan(PostgresHeapSeqScan &&other);

public:
void InitParallelScanState();
void InitParallelScanState( duckdb::TableFunctionInitInput &input);
void
SetSnapshot(Snapshot snapshot) {
m_snapshot = snapshot;
}

public:
Relation GetRelation();
void CloseRelation();
TupleDesc GetTupleDesc();
bool ReadPageTuples(duckdb::DataChunk &output, PostgresHeapSeqScanThreadInfo &threadScanInfo);
bool IsValid() const;
Expand All @@ -68,9 +88,10 @@ class PostgresHeapSeqScan {
Page PreparePageRead(PostgresHeapSeqScanThreadInfo &threadScanInfo);

private:
RangeTblEntry * m_tableEntry = nullptr;
Relation m_rel = nullptr;
Snapshot m_snapshot = nullptr;
ParallelScanState m_parallel_scan_state;
PostgresHeapSeqParallelScanState m_parallel_scan_state;
};

} // namespace quack
9 changes: 9 additions & 0 deletions include/quack/quack_node.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#pragma once

extern "C" {
#include "postgres.h"
#include "nodes/extensible.h"
}

extern CustomScanMethods quack_scan_scan_methods;
extern "C" void quack_init_node(void);
8 changes: 8 additions & 0 deletions include/quack/quack_planner.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#pragma once

extern "C" {
#include "postgres.h"
#include "optimizer/planner.h"
}

PlannedStmt *quack_plan_node(Query *parse, const char *query_string, int cursorOptions, ParamListInfo boundParams);
9 changes: 0 additions & 9 deletions include/quack/quack_select.h

This file was deleted.

12 changes: 11 additions & 1 deletion include/quack/quack_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,19 @@ extern "C" {
#include "executor/tuptable.h"
}

#include "quack/quack_heap_seq_scan.hpp"

namespace quack {

// DuckDB has date starting from 1/1/1970 while PG starts from 1/1/2000
constexpr int32_t QUACK_DUCK_DATE_OFFSET = 10957;
constexpr int64_t QUACK_DUCK_TIMESTAMP_OFFSET = INT64CONST(10957) * USECS_PER_DAY;

duckdb::LogicalType ConvertPostgresToDuckColumnType(Oid type, int32_t typmod);
Oid GetPostgresDuckDBType(duckdb::LogicalTypeId type);
void ConvertPostgresToDuckValue(Datum value, duckdb::Vector &result, idx_t offset);
void ConvertDuckToPostgresValue(TupleTableSlot *slot, duckdb::Value &value, idx_t col);
void InsertTupleIntoChunk(duckdb::DataChunk &output, TupleDesc tuple, HeapTupleData *slot, idx_t offset);
void InsertTupleIntoChunk(duckdb::DataChunk &output, PostgresHeapSeqScanThreadInfo &threadScanInfo,
PostgresHeapSeqParallelScanState &parallelScanState);

} // namespace quack
24 changes: 24 additions & 0 deletions include/quack/quack_utils.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#pragma once

#include <vector>
#include <string>
#include <sstream>

#include <cstdio>
#include <cstdarg>
#include <cstring>

namespace quack {

inline std::vector<std::string>
tokenizeString(char *str, const char delimiter) {
std::vector<std::string> v;
std::stringstream ss(str); // Turn the string into a stream.
std::string tok;
while (getline(ss, tok, delimiter)) {
v.push_back(tok);
}
return v;
};

} // namespace quack
8 changes: 8 additions & 0 deletions quack--0.0.1.sql
Original file line number Diff line number Diff line change
@@ -1 +1,9 @@
LOAD 'quack';

CREATE OR REPLACE FUNCTION read_parquet(path text)
RETURNS SETOF record LANGUAGE 'plpgsql' AS
$func$
BEGIN
RETURN QUERY EXECUTE 'SELECT 1';
END;
$func$;
Loading

0 comments on commit 1255e15

Please sign in to comment.