-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
pipeline: refactor such that user can return the DF for debugging
- refactor all read_* create_* into one create_tbl function that can optionally return the table as a dataframe for easier debugging - use function dispatch to simplify implementation (thanks @clizbe!) - custom exceptions for cleaner user feedback Fixes: #14, #18
- Loading branch information
Showing
4 changed files
with
153 additions
and
99 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
module TulipaIO | ||
|
||
include("exceptions.jl") | ||
include("parsers.jl") | ||
include("pipeline.jl") | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import DuckDB: DB | ||
|
||
struct FileNotFoundError <: Exception | ||
file::String | ||
msg::String | ||
function FileNotFoundError(file) | ||
if ispath(file) | ||
new(file, "$(file): exists, but not a regular file") | ||
else | ||
new(file, "$(file): file not found") | ||
end | ||
end | ||
end | ||
|
||
struct DirectoryNotFoundError <: Exception | ||
dir::String | ||
msg::String | ||
function DirectoryNotFoundError(dir) | ||
if ispath(dir) | ||
new(dir, "$(dir): exists, but not a directory") | ||
else | ||
new(dir, "$(dir): directory not found") | ||
end | ||
end | ||
end | ||
|
||
struct TableNotFoundError <: Exception | ||
con::DB | ||
tbl::String | ||
msg::String | ||
TableNotFoundError(con, tbl) = new(con, tbl, "$(tbl): table not found in $(con)") | ||
end | ||
|
||
struct NeitherTableNorFileError <: Exception | ||
con::DB | ||
src::String | ||
msg::String | ||
NeitherTableNorFileError(con, src, msg) = new(con, src, "$(src): neither table ($con) nor file found") | ||
end | ||
|
||
Base.showerror(io::IO, exc::FileNotFoundError) = print(io, exc.msg) | ||
Base.showerror(io::IO, exc::DirectoryNotFoundError) = print(io, exc.msg) | ||
Base.showerror(io::IO, exc::TableNotFoundError) = print(io, exc.msg) | ||
Base.showerror(io::IO, exc::NeitherTableNorFileError) = print(io, exc.msg) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
module FmtSQL | ||
|
||
function sprintf(fmt::String, args...) | ||
format(Format(fmt), args...) | ||
end | ||
|
||
function fmt_opts(source::String; opts...) | ||
_src = '?' in source ? "$source" : "'$(source)'" | ||
join(["$(_src)"; [join(p, "=") for p in opts]], ", ") | ||
end | ||
|
||
function reader(source::String) | ||
_, ext = splitext(source) | ||
if ext in (".csv", ".parquet", ".json") | ||
return "read_$(ext[2:end])_auto" | ||
elseif '?' in source | ||
# FIXME: how to support other file formats? | ||
return "read_csv_auto" | ||
else | ||
error("$(ext[2:end]): unsupported input file '$(source)'") | ||
end | ||
end | ||
|
||
function fmt_read(source::String, opts...) | ||
sprintf("%s(%s)", reader(source), fmt_opts(source; opts...)) | ||
end | ||
|
||
function fmt_select(source::String; opts...) | ||
sprintf("SELECT * FROM %s", fmt_read(source; opts...)) | ||
end | ||
|
||
function fmt_join( | ||
from_subquery::String, | ||
join_subquery::String; | ||
on::Vector{String}, | ||
cols::Vector{String}, | ||
fill::Union{Bool,Vector::Any}, | ||
) | ||
exclude = join(cols, ", ") | ||
if fill # back fill | ||
# e.g.: IFNULL(t2.investable, t1.investable) AS investable | ||
include = join(map(c -> "IFNULL(t2.$c, t1.$c) AS $c", cols), ", ") | ||
elseif !fill # explicit missing | ||
include = join(map(c -> "t2.$c", cols), ", ") | ||
else # fill with default | ||
if length(fill) != length(cols) | ||
msg = "number of default values does not match columns\n" | ||
msg = msg * "columns: $cols\n" | ||
msg = msg * "defaults: $fill" | ||
error(msg) | ||
end | ||
include = join(map((c, f) -> "IFNULL(t2.$c, $f) AS $c", zip(cols, fill)), ", ") | ||
end | ||
select_ = "SELECT t1.* EXCLUDE ($exclude), $include" | ||
|
||
join_on = join(map(c -> "t1.$c = t2.$c", on), " AND ") | ||
from_ = "FROM $from_subquery t1 LEFT JOIN $join_subquery t2 ON ($join_on)" | ||
|
||
"$(select_)\n$(from_)" | ||
end | ||
|
||
end # module FmtSQL |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters