From 2e68607ee1ac4da9637be864f1c7a4f837742b34 Mon Sep 17 00:00:00 2001 From: Lorenzo Mangani Date: Mon, 9 Sep 2024 22:37:22 +0200 Subject: [PATCH] Basic ClickHouse HTTP query wrapper (#8) * Basic ClickHouse http query macro wrapper --- CMakeLists.txt | 2 +- src/chsql_extension.cpp | 3 +- src/default_table_functions.cpp | 148 ------------------------ src/include/default_functions.hpp | 41 ------- src/include/default_table_functions.hpp | 47 -------- 5 files changed, 3 insertions(+), 238 deletions(-) delete mode 100644 src/default_table_functions.cpp delete mode 100644 src/include/default_functions.hpp delete mode 100644 src/include/default_table_functions.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 26ca360..fc06a4c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,7 @@ set(LOADABLE_EXTENSION_NAME ${TARGET_NAME}_loadable_extension) project(${TARGET_NAME}) include_directories(src/include) -set(EXTENSION_SOURCES src/chsql_extension.cpp src/default_table_functions.cpp) +set(EXTENSION_SOURCES src/chsql_extension.cpp) build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES}) build_loadable_extension(${TARGET_NAME} " " ${EXTENSION_SOURCES}) diff --git a/src/chsql_extension.cpp b/src/chsql_extension.cpp index 9a7e868..feae57e 100644 --- a/src/chsql_extension.cpp +++ b/src/chsql_extension.cpp @@ -143,7 +143,8 @@ static DefaultMacro chsql_macros[] = { static const DefaultTableMacro chsql_table_macros[] = { {DEFAULT_SCHEMA, "tableMultiply", {"x", nullptr}, {{"two", "2"}, {nullptr, nullptr}}, R"(SELECT x * two as output_column;)"}, {DEFAULT_SCHEMA, "numbers", {"x", nullptr}, {{"z", "0"}, {nullptr, nullptr}}, R"(SELECT * as number FROM generate_series(z,x-1);)"}, - {nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr} + {DEFAULT_SCHEMA, "ch_scan", {"query", "server"}, {{"format", "JSONEachRow"}, {"user", "play"}, {nullptr, nullptr}}, R"(SELECT * FROM read_json_auto(concat(server, '/?default_format=', format, '&user=', user, '&query=', query)))"}, + {nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr} }; // clang-format on diff --git a/src/default_table_functions.cpp b/src/default_table_functions.cpp deleted file mode 100644 index b0755c8..0000000 --- a/src/default_table_functions.cpp +++ /dev/null @@ -1,148 +0,0 @@ -#include "duckdb/catalog/default/default_table_functions.hpp" -#include "duckdb/catalog/catalog_entry/table_macro_catalog_entry.hpp" -#include "duckdb/parser/parser.hpp" -#include "duckdb/parser/parsed_data/create_macro_info.hpp" -#include "duckdb/parser/statement/select_statement.hpp" -#include "duckdb/function/table_macro_function.hpp" - -namespace duckdb { - -// clang-format off -static const DefaultTableMacro internal_table_macros[] = { - {DEFAULT_SCHEMA, "histogram_values", {"source", "col_name", nullptr}, {{"bin_count", "10"}, {"technique", "'auto'"}, {nullptr, nullptr}}, R"( -WITH bins AS ( - SELECT - CASE - WHEN (NOT (can_cast_implicitly(MIN(col_name), NULL::BIGINT) OR - can_cast_implicitly(MIN(col_name), NULL::DOUBLE) OR - can_cast_implicitly(MIN(col_name), NULL::TIMESTAMP)) AND technique='auto') - OR technique='sample' - THEN - approx_top_k(col_name, bin_count) - WHEN technique='equi-height' - THEN - quantile(col_name, [x / bin_count::DOUBLE for x in generate_series(1, bin_count)]) - WHEN technique='equi-width' - THEN - equi_width_bins(MIN(col_name), MAX(col_name), bin_count, false) - WHEN technique='equi-width-nice' OR technique='auto' - THEN - equi_width_bins(MIN(col_name), MAX(col_name), bin_count, true) - ELSE - error(concat('Unrecognized technique ', technique)) - END AS bins - FROM query_table(source::VARCHAR) - ) -SELECT UNNEST(map_keys(histogram)) AS bin, UNNEST(map_values(histogram)) AS count -FROM ( - SELECT CASE - WHEN (NOT (can_cast_implicitly(MIN(col_name), NULL::BIGINT) OR - can_cast_implicitly(MIN(col_name), NULL::DOUBLE) OR - can_cast_implicitly(MIN(col_name), NULL::TIMESTAMP)) AND technique='auto') - OR technique='sample' - THEN - histogram_exact(col_name, bins) - ELSE - histogram(col_name, bins) - END AS histogram - FROM query_table(source::VARCHAR), bins -); -)"}, - {DEFAULT_SCHEMA, "histogram", {"source", "col_name", nullptr}, {{"bin_count", "10"}, {"technique", "'auto'"}, {nullptr, nullptr}}, R"( -SELECT - CASE - WHEN is_histogram_other_bin(bin) - THEN '(other values)' - WHEN (NOT (can_cast_implicitly(bin, NULL::BIGINT) OR - can_cast_implicitly(bin, NULL::DOUBLE) OR - can_cast_implicitly(bin, NULL::TIMESTAMP)) AND technique='auto') - OR technique='sample' - THEN bin::VARCHAR - WHEN row_number() over () = 1 - THEN concat('x <= ', bin::VARCHAR) - ELSE concat(lag(bin::VARCHAR) over (), ' < x <= ', bin::VARCHAR) - END AS bin, - count, - bar(count, 0, max(count) over ()) AS bar -FROM histogram_values(source, col_name, bin_count := bin_count, technique := technique); -)"}, - {nullptr, nullptr, {nullptr}, {{nullptr, nullptr}}, nullptr} - }; -// clang-format on - -DefaultTableFunctionGenerator::DefaultTableFunctionGenerator(Catalog &catalog, SchemaCatalogEntry &schema) - : DefaultGenerator(catalog), schema(schema) { -} - -unique_ptr -DefaultTableFunctionGenerator::CreateInternalTableMacroInfo(const DefaultTableMacro &default_macro, - unique_ptr function) { - for (idx_t param_idx = 0; default_macro.parameters[param_idx] != nullptr; param_idx++) { - function->parameters.push_back(make_uniq(default_macro.parameters[param_idx])); - } - for (idx_t named_idx = 0; default_macro.named_parameters[named_idx].name != nullptr; named_idx++) { - auto expr_list = Parser::ParseExpressionList(default_macro.named_parameters[named_idx].default_value); - if (expr_list.size() != 1) { - throw InternalException("Expected a single expression"); - } - function->default_parameters.insert( - make_pair(default_macro.named_parameters[named_idx].name, std::move(expr_list[0]))); - } - - auto type = CatalogType::TABLE_MACRO_ENTRY; - auto bind_info = make_uniq(type); - bind_info->schema = default_macro.schema; - bind_info->name = default_macro.name; - bind_info->temporary = true; - bind_info->internal = true; - bind_info->macros.push_back(std::move(function)); - return bind_info; -} - -unique_ptr -DefaultTableFunctionGenerator::CreateTableMacroInfo(const DefaultTableMacro &default_macro) { - Parser parser; - parser.ParseQuery(default_macro.macro); - if (parser.statements.size() != 1 || parser.statements[0]->type != StatementType::SELECT_STATEMENT) { - throw InternalException("Expected a single select statement in CreateTableMacroInfo internal"); - } - auto node = std::move(parser.statements[0]->Cast().node); - - auto result = make_uniq(std::move(node)); - return CreateInternalTableMacroInfo(default_macro, std::move(result)); -} - -static unique_ptr GetDefaultTableFunction(const string &input_schema, const string &input_name) { - auto schema = StringUtil::Lower(input_schema); - auto name = StringUtil::Lower(input_name); - for (idx_t index = 0; internal_table_macros[index].name != nullptr; index++) { - if (internal_table_macros[index].schema == schema && internal_table_macros[index].name == name) { - return DefaultTableFunctionGenerator::CreateTableMacroInfo(internal_table_macros[index]); - } - } - return nullptr; -} - -unique_ptr DefaultTableFunctionGenerator::CreateDefaultEntry(ClientContext &context, - const string &entry_name) { - auto info = GetDefaultTableFunction(schema.name, entry_name); - if (info) { - return make_uniq_base(catalog, schema, info->Cast()); - } - return nullptr; -} - -vector DefaultTableFunctionGenerator::GetDefaultEntries() { - vector result; - for (idx_t index = 0; internal_table_macros[index].name != nullptr; index++) { - if (StringUtil::Lower(internal_table_macros[index].name) != internal_table_macros[index].name) { - throw InternalException("Default macro name %s should be lowercase", internal_table_macros[index].name); - } - if (internal_table_macros[index].schema == schema.name) { - result.emplace_back(internal_table_macros[index].name); - } - } - return result; -} - -} // namespace duckdb diff --git a/src/include/default_functions.hpp b/src/include/default_functions.hpp deleted file mode 100644 index 3466585..0000000 --- a/src/include/default_functions.hpp +++ /dev/null @@ -1,41 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/catalog/default/default_functions.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "duckdb/catalog/default/default_generator.hpp" -#include "duckdb/parser/parsed_data/create_macro_info.hpp" - -namespace duckdb { -class SchemaCatalogEntry; - -struct DefaultMacro { - const char *schema; - const char *name; - const char *parameters[8]; - const char *macro; -}; - -class DefaultFunctionGenerator : public DefaultGenerator { -public: - DefaultFunctionGenerator(Catalog &catalog, SchemaCatalogEntry &schema); - - SchemaCatalogEntry &schema; - - DUCKDB_API static unique_ptr CreateInternalMacroInfo(const DefaultMacro &default_macro); - -public: - unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) override; - vector GetDefaultEntries() override; - -private: - static unique_ptr CreateInternalMacroInfo(const DefaultMacro &default_macro, - unique_ptr function); -}; - -} // namespace duckdb diff --git a/src/include/default_table_functions.hpp b/src/include/default_table_functions.hpp deleted file mode 100644 index c0eee28..0000000 --- a/src/include/default_table_functions.hpp +++ /dev/null @@ -1,47 +0,0 @@ -//===----------------------------------------------------------------------===// -// DuckDB -// -// duckdb/catalog/default/default_table_functions.hpp -// -// -//===----------------------------------------------------------------------===// - -#pragma once - -#include "duckdb/catalog/default/default_generator.hpp" -#include "duckdb/parser/parsed_data/create_macro_info.hpp" - -namespace duckdb { -class SchemaCatalogEntry; - -struct DefaultNamedParameter { - const char *name; - const char *default_value; -}; - -struct DefaultTableMacro { - const char *schema; - const char *name; - const char *parameters[8]; - DefaultNamedParameter named_parameters[8]; - const char *macro; -}; - -class DefaultTableFunctionGenerator : public DefaultGenerator { -public: - DefaultTableFunctionGenerator(Catalog &catalog, SchemaCatalogEntry &schema); - - SchemaCatalogEntry &schema; - -public: - unique_ptr CreateDefaultEntry(ClientContext &context, const string &entry_name) override; - vector GetDefaultEntries() override; - - static unique_ptr CreateTableMacroInfo(const DefaultTableMacro &default_macro); - -private: - static unique_ptr CreateInternalTableMacroInfo(const DefaultTableMacro &default_macro, - unique_ptr function); -}; - -} // namespace duckdb