Skip to content

Commit

Permalink
Add support for scanning views (#44)
Browse files Browse the repository at this point in the history
First we took the `parse->rtable` that Postgres has provided to us after
parsing the query, then we looked through this list in the replacement
scan, we returned this RangeTblEntry and used it for the Oid throughout
execution of the scan.

The only benefit that the RangeTblEntry gave us was that it contained an
alias for where it appeared in the query, but we don't need that, DuckDB
already provides us the alias.

Internal details:
Inside the replacement scan we find the Oid given the name
(schema+table), from there we check if this is a VIEW or not.
If it is, we look up the views definition and return a SubqueryRef that
will get bound later, essentially rewriting:
`select * from vw` into `select * from (select * from tbl)`
  • Loading branch information
Tishj authored Jun 18, 2024
1 parent d8b024f commit 726db29
Show file tree
Hide file tree
Showing 8 changed files with 166 additions and 62 deletions.
4 changes: 2 additions & 2 deletions include/quack/quack_heap_seq_scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class PostgresHeapSeqParallelScanState {
class PostgresHeapSeqScan {
private:
public:
PostgresHeapSeqScan(RangeTblEntry *table);
PostgresHeapSeqScan(Oid relid);
~PostgresHeapSeqScan();
PostgresHeapSeqScan(const PostgresHeapSeqScan &other) = delete;
PostgresHeapSeqScan &operator=(const PostgresHeapSeqScan &other) = delete;
Expand All @@ -90,7 +90,7 @@ class PostgresHeapSeqScan {
Page PreparePageRead(PostgresHeapSeqScanThreadInfo &threadScanInfo);

private:
RangeTblEntry * m_table = nullptr;
Oid m_relid = InvalidOid;
Relation m_rel = nullptr;
Snapshot m_snapshot = nullptr;
PostgresHeapSeqParallelScanState m_parallel_scan_state;
Expand Down
140 changes: 87 additions & 53 deletions src/quack_heap_scan.cpp
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
#include "duckdb/main/client_context.hpp"
#include "duckdb/function/replacement_scan.hpp"
#include "duckdb/parser/tableref/table_function_ref.hpp"
#include "duckdb/parser/parser.hpp"
#include "duckdb/parser/tableref/subqueryref.hpp"
#include "duckdb/parser/expression/function_expression.hpp"
#include "duckdb/parser/statement/select_statement.hpp"
#include "duckdb/parser/expression/constant_expression.hpp"
#include "duckdb/parser/expression/comparison_expression.hpp"
#include "duckdb/parser/expression/columnref_expression.hpp"
#include "duckdb/parser/qualified_name.hpp"
#include "duckdb/common/enums/statement_type.hpp"
#include "duckdb/common/enums/expression_type.hpp"

extern "C" {
#include "postgres.h"
#include "catalog/namespace.h"
#include "utils/regproc.h"
#include "utils/syscache.h"
#include "utils/builtins.h"
}

#include "quack/quack_heap_scan.hpp"
Expand All @@ -24,7 +30,7 @@ namespace quack {
//

PostgresHeapScanFunctionData::PostgresHeapScanFunctionData(PostgresHeapSeqScan &&relation, Snapshot snapshot)
: m_relation(std::move(relation)) {
: m_relation(std::move(relation)) {
m_relation.SetSnapshot(snapshot);
}

Expand All @@ -36,7 +42,7 @@ PostgresHeapScanFunctionData::~PostgresHeapScanFunctionData() {
//

PostgresHeapScanGlobalState::PostgresHeapScanGlobalState(PostgresHeapSeqScan &relation,
duckdb::TableFunctionInitInput &input) {
duckdb::TableFunctionInitInput &input) {
elog(DEBUG3, "-- (DuckDB/PostgresHeapScanGlobalState) Running %llu threads -- ", MaxThreads());
relation.InitParallelScanState(input);
}
Expand All @@ -62,9 +68,9 @@ PostgresHeapScanLocalState::~PostgresHeapScanLocalState() {
//

PostgresHeapScanFunction::PostgresHeapScanFunction()
: TableFunction("postgres_heap_scan", {}, PostgresHeapScanFunc, PostgresHeapBind, PostgresHeapInitGlobal,
PostgresHeapInitLocal) {
named_parameters["table"] = duckdb::LogicalType::POINTER;
: TableFunction("postgres_heap_scan", {}, PostgresHeapScanFunc, PostgresHeapBind, PostgresHeapInitGlobal,
PostgresHeapInitLocal) {
named_parameters["relid"] = duckdb::LogicalType::UINTEGER;
named_parameters["snapshot"] = duckdb::LogicalType::POINTER;
projection_pushdown = true;
filter_pushdown = true;
Expand All @@ -73,16 +79,16 @@ PostgresHeapScanFunction::PostgresHeapScanFunction()

duckdb::unique_ptr<duckdb::FunctionData>
PostgresHeapScanFunction::PostgresHeapBind(duckdb::ClientContext &context, duckdb::TableFunctionBindInput &input,
duckdb::vector<duckdb::LogicalType> &return_types,
duckdb::vector<duckdb::string> &names) {
auto table = (reinterpret_cast<RangeTblEntry *>(input.named_parameters["table"].GetPointer()));
duckdb::vector<duckdb::LogicalType> &return_types,
duckdb::vector<duckdb::string> &names) {
auto relid = input.named_parameters["relid"].GetValue<uint32_t>();
auto snapshot = (reinterpret_cast<Snapshot>(input.named_parameters["snapshot"].GetPointer()));

auto rel = PostgresHeapSeqScan(table);
auto rel = PostgresHeapSeqScan(relid);
auto tupleDesc = RelationGetDescr(rel.GetRelation());

if (!tupleDesc) {
elog(ERROR, "Failed to get tuple descriptor for relation with OID %u", table->relid);
elog(ERROR, "Failed to get tuple descriptor for relation with OID %u", relid);
return nullptr;
}

Expand All @@ -94,29 +100,29 @@ PostgresHeapScanFunction::PostgresHeapBind(duckdb::ClientContext &context, duckd
names.push_back(col_name);
/* Log column name and type */
elog(DEBUG3, "-- (DuckDB/PostgresHeapBind) Column name: %s, Type: %s --", col_name.c_str(),
duck_type.ToString().c_str());
duck_type.ToString().c_str());
}
return duckdb::make_uniq<PostgresHeapScanFunctionData>(std::move(rel), snapshot);
}

duckdb::unique_ptr<duckdb::GlobalTableFunctionState>
PostgresHeapScanFunction::PostgresHeapInitGlobal(duckdb::ClientContext &context,
duckdb::TableFunctionInitInput &input) {
duckdb::TableFunctionInitInput &input) {
auto &bind_data = input.bind_data->CastNoConst<PostgresHeapScanFunctionData>();
return duckdb::make_uniq<PostgresHeapScanGlobalState>(bind_data.m_relation, input);
}

duckdb::unique_ptr<duckdb::LocalTableFunctionState>
PostgresHeapScanFunction::PostgresHeapInitLocal(duckdb::ExecutionContext &context,
duckdb::TableFunctionInitInput &input,
duckdb::GlobalTableFunctionState *gstate) {
duckdb::TableFunctionInitInput &input,
duckdb::GlobalTableFunctionState *gstate) {
auto &bind_data = input.bind_data->CastNoConst<PostgresHeapScanFunctionData>();
return duckdb::make_uniq<PostgresHeapScanLocalState>(bind_data.m_relation);
}

void
PostgresHeapScanFunction::PostgresHeapScanFunc(duckdb::ClientContext &context, duckdb::TableFunctionInput &data_p,
duckdb::DataChunk &output) {
duckdb::DataChunk &output) {
auto &bind_data = data_p.bind_data->CastNoConst<PostgresHeapScanFunctionData>();
auto &l_data = data_p.local_state->Cast<PostgresHeapScanLocalState>();

Expand All @@ -142,69 +148,97 @@ PostgresHeapScanFunction::PostgresHeapScanFunc(duckdb::ClientContext &context, d
// PostgresHeapReplacementScan
//

static RangeTblEntry *
FindMatchingHeapRelation(List *tables, const duckdb::string &to_find) {
ListCell *lc;

foreach (lc, tables) {
RangeTblEntry *table = (RangeTblEntry *)lfirst(lc);
if (table->relid) {
auto rel = RelationIdGetRelation(table->relid);
if (!RelationIsValid(rel)) {
elog(ERROR, "Relation with OID %u is not valid", table->relid);
return nullptr;
}
/* Allow only heap tables */
if (!rel->rd_amhandler || (GetTableAmRoutine(rel->rd_amhandler) != GetHeapamTableAmRoutine())) {
/* This doesn't have an access method handler, we cant read from this */
RelationClose(rel);
return nullptr;
}
RangeVar *tableRangeVar = makeRangeVarFromNameList(stringToQualifiedNameList(to_find.c_str(), NULL));
Oid relOid = RangeVarGetRelid(tableRangeVar, AccessShareLock, true);
if (table->relid == relOid) {
RelationClose(rel);
return table;
}
RelationClose(rel);
}
static Oid
FindMatchingRelation(const duckdb::string &to_find) {
RangeVar *tableRangeVar = makeRangeVarFromNameList(stringToQualifiedNameList(to_find.c_str(), NULL));
Oid relOid = RangeVarGetRelid(tableRangeVar, AccessShareLock, true);
if (relOid != InvalidOid) {
return relOid;
}

return nullptr;
return InvalidOid;
}

static duckdb::vector<duckdb::unique_ptr<duckdb::ParsedExpression>>
CreateFunctionArguments(RangeTblEntry *table, Snapshot snapshot) {
CreateFunctionArguments(Oid relid, Snapshot snapshot) {
duckdb::vector<duckdb::unique_ptr<duckdb::ParsedExpression>> children;
children.push_back(duckdb::make_uniq<duckdb::ComparisonExpression>(
duckdb::ExpressionType::COMPARE_EQUAL, duckdb::make_uniq<duckdb::ColumnRefExpression>("table"),
duckdb::make_uniq<duckdb::ConstantExpression>(duckdb::Value::POINTER(duckdb::CastPointerToValue(table)))));
duckdb::ExpressionType::COMPARE_EQUAL, duckdb::make_uniq<duckdb::ColumnRefExpression>("relid"),
duckdb::make_uniq<duckdb::ConstantExpression>(duckdb::Value::UINTEGER(relid))));

children.push_back(duckdb::make_uniq<duckdb::ComparisonExpression>(
duckdb::ExpressionType::COMPARE_EQUAL, duckdb::make_uniq<duckdb::ColumnRefExpression>("snapshot"),
duckdb::make_uniq<duckdb::ConstantExpression>(duckdb::Value::POINTER(duckdb::CastPointerToValue(snapshot)))));
duckdb::ExpressionType::COMPARE_EQUAL, duckdb::make_uniq<duckdb::ColumnRefExpression>("snapshot"),
duckdb::make_uniq<duckdb::ConstantExpression>(duckdb::Value::POINTER(duckdb::CastPointerToValue(snapshot)))));
return children;
}

duckdb::unique_ptr<duckdb::TableRef> ReplaceView(Oid view) {
auto oid = ObjectIdGetDatum(view);
Datum viewdef = DirectFunctionCall1(pg_get_viewdef, oid);
auto view_definition = text_to_cstring(DatumGetTextP(viewdef));

if (!view_definition) {
elog(ERROR, "Could not retrieve view definition for Relation with relid: %u", view);
}

duckdb::Parser parser;
parser.ParseQuery(view_definition);
auto statements = std::move(parser.statements);
if (statements.size() != 1) {
elog(ERROR, "View definition contained more than 1 statement!");
}

if (statements[0]->type != duckdb::StatementType::SELECT_STATEMENT) {
elog(ERROR, "View definition (%s) did not contain a SELECT statement!", view_definition);
}

auto select = duckdb::unique_ptr_cast<duckdb::SQLStatement, duckdb::SelectStatement>(std::move(statements[0]));
auto subquery = duckdb::make_uniq<duckdb::SubqueryRef>(std::move(select));
return std::move(subquery);
}

duckdb::unique_ptr<duckdb::TableRef>
PostgresHeapReplacementScan(duckdb::ClientContext &context, duckdb::ReplacementScanInput &input,
duckdb::optional_ptr<duckdb::ReplacementScanData> data) {
duckdb::optional_ptr<duckdb::ReplacementScanData> data) {

auto &table_name = input.table_name;
auto &scan_data = reinterpret_cast<PostgresHeapReplacementScanData &>(*data);

/* Check name against query table list and verify that it is heap table */
auto table = FindMatchingHeapRelation(scan_data.m_tables, table_name);
auto relid = FindMatchingRelation(table_name);

if (relid == InvalidOid) {
return nullptr;
}

// Check if the Relation is a VIEW
auto tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
if (!HeapTupleIsValid(tuple)) {
elog(ERROR, "Cache lookup failed for relation %u", relid);
}

auto relForm = (Form_pg_class) GETSTRUCT(tuple);

// Check if the relation is a view
if (relForm->relkind == RELKIND_VIEW) {
ReleaseSysCache(tuple);
return ReplaceView(relid);
}
ReleaseSysCache(tuple);

if (!table) {
auto rel = RelationIdGetRelation(relid);
/* Allow only heap tables */
if (!rel->rd_amhandler || (GetTableAmRoutine(rel->rd_amhandler) != GetHeapamTableAmRoutine())) {
/* This doesn't have an access method handler, we cant read from this */
RelationClose(rel);
return nullptr;
}
RelationClose(rel);

// Create POINTER values from the 'table' and 'snapshot' variables
auto children = CreateFunctionArguments(table, GetActiveSnapshot());
auto children = CreateFunctionArguments(relid, GetActiveSnapshot());
auto table_function = duckdb::make_uniq<duckdb::TableFunctionRef>();
table_function->function = duckdb::make_uniq<duckdb::FunctionExpression>("postgres_heap_scan", std::move(children));
table_function->alias = table->alias ? table->alias->aliasname : table_name;
table_function->alias = table_name;

return std::move(table_function);
}
Expand Down
12 changes: 6 additions & 6 deletions src/quack_heap_seq_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ extern "C" {
#include <thread>

namespace quack {
PostgresHeapSeqScan::PostgresHeapSeqScan(RangeTblEntry *table)
: m_table(table), m_rel(nullptr), m_snapshot(nullptr) {
PostgresHeapSeqScan::PostgresHeapSeqScan(Oid relid)
: m_relid(relid), m_rel(nullptr), m_snapshot(nullptr) {
}

PostgresHeapSeqScan::~PostgresHeapSeqScan() {
Expand All @@ -26,15 +26,15 @@ PostgresHeapSeqScan::~PostgresHeapSeqScan() {
}

PostgresHeapSeqScan::PostgresHeapSeqScan(PostgresHeapSeqScan &&other)
: m_table(other.m_table), m_rel(nullptr) {
: m_relid(other.m_relid), m_rel(nullptr) {
other.CloseRelation();
other.m_table = nullptr;
other.m_relid = InvalidOid;
}

Relation
PostgresHeapSeqScan::GetRelation() {
if (m_table && m_rel == nullptr) {
m_rel = RelationIdGetRelation(m_table->relid);
if (m_relid != InvalidOid && m_rel == nullptr) {
m_rel = RelationIdGetRelation(m_relid);
}
return m_rel;
}
Expand Down
1 change: 1 addition & 0 deletions test/regression/expected/search_path.out
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,4 @@ SELECT count(*) FROM public.t, other.t;
DROP TABLE other.t;
DROP SCHEMA other;
RESET search_path;
drop table t;
40 changes: 40 additions & 0 deletions test/regression/expected/views.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
create table view_table(a varchar);
insert into view_table values ('test'), ('hello');
create view vw as select * from view_table;
select * from vw;
a
-------
test
hello
(2 rows)

select * from vw offset 1;
a
-------
hello
(1 row)

select * from vw limit 1;
a
------
test
(1 row)

drop view vw;
create schema s;
create table s.t as select 21;
create table "s.t" as select 42;
create view vw1 as select * from s.t;
create view vw2 as select * from "s.t";
select * from vw1, vw2;
?column? | ?column?
----------+----------
21 | 21
(1 row)

drop view vw1;
drop view vw2;
drop table "s.t";
drop table s.t;
drop schema s;
drop table view_table;
1 change: 1 addition & 0 deletions test/regression/schedule
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ test: search_path
test: execution_error
test: type_support
test: array_type_support
test: views
test: projection_pushdown_unsupported_type
4 changes: 3 additions & 1 deletion test/regression/sql/search_path.sql
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,6 @@ SELECT count(*) FROM public.t, other.t;
-- Cleanup
DROP TABLE other.t;
DROP SCHEMA other;
RESET search_path;
RESET search_path;

drop table t;
26 changes: 26 additions & 0 deletions test/regression/sql/views.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
create table view_table(a varchar);
insert into view_table values ('test'), ('hello');

create view vw as select * from view_table;

select * from vw;
select * from vw offset 1;
select * from vw limit 1;

drop view vw;

create schema s;
create table s.t as select 21;
create table "s.t" as select 42;

create view vw1 as select * from s.t;
create view vw2 as select * from "s.t";

select * from vw1, vw2;

drop view vw1;
drop view vw2;
drop table "s.t";
drop table s.t;
drop schema s;
drop table view_table;

0 comments on commit 726db29

Please sign in to comment.