Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change import to using in test #83

Merged
merged 2 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions test/runtests.jl
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
using TulipaIO: TulipaIO

import Test: @test, @testset, @test_throws
using Test: Test, @test, @testset, @test_throws

const DATA = joinpath(@__DIR__, "data")

Expand Down
15 changes: 9 additions & 6 deletions test/test-convenience.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
using CSV, DataFrames, DuckDB, TulipaIO
using CSV: CSV
using DataFrames: DataFrames, DataFrame
using DuckDB: DuckDB, DBInterface
using TulipaIO: TulipaIO

@testset "Test convenience functions" begin
@testset "Read CSV folder" begin
Expand All @@ -12,7 +15,7 @@ using CSV, DataFrames, DuckDB, TulipaIO
end

connection = DBInterface.connect(DuckDB.DB)
read_csv_folder(connection, tmpdir)
TulipaIO.read_csv_folder(connection, tmpdir)
@test (DBInterface.execute(connection, "SHOW TABLES") |> DataFrame |> df -> df.name) ==
["some_file"]
end
Expand All @@ -23,16 +26,16 @@ using CSV, DataFrames, DuckDB, TulipaIO
"rep_periods_mapping" =>
Dict(:period => "INT", :rep_period => "VARCHAR", :weight => "DOUBLE"),
)
read_csv_folder(con, "data/Norse"; schemas)
TulipaIO.read_csv_folder(con, "data/Norse"; schemas)
df_types = DuckDB.query(con, "DESCRIBE rep_periods_mapping") |> DataFrame
@test df_types.column_name == ["period", "rep_period", "weight"]
@test df_types.column_type == ["INTEGER", "VARCHAR", "DOUBLE"]
end

@testset "Test show_tables and get_table" begin
connection = DBInterface.connect(DuckDB.DB)
create_tbl(connection, "data/Norse/assets-data.csv"; name = "my_table")
@test show_tables(connection).name == ["my_table"]
@test "Asgard_Battery" in get_table(connection, "my_table").name
TulipaIO.create_tbl(connection, "data/Norse/assets-data.csv"; name = "my_table")
@test TulipaIO.show_tables(connection).name == ["my_table"]
@test "Asgard_Battery" in TulipaIO.get_table(connection, "my_table").name
end
end
169 changes: 93 additions & 76 deletions test/test-pipeline.jl
Original file line number Diff line number Diff line change
@@ -1,16 +1,14 @@
using CSV: CSV
import DataFrames as DF
import DuckDB: DuckDB as DD, DB, DBInterface
using DataFrames: DataFrames, DataFrame
using DuckDB: DuckDB, DBInterface

TIO = TulipaIO

function shape(df::DF.DataFrame)
return (DF.nrow(df), DF.ncol(df))
function shape(df::DataFrame)
return (DataFrames.nrow(df), DataFrames.ncol(df))
end

function tmp_tbls(con::DB)
function tmp_tbls(con::DuckDB.DB)
res = DBInterface.execute(con, "SELECT name FROM (SHOW ALL TABLES) WHERE temporary = true")
return DF.DataFrame(res)
return DataFrame(res)
end

"""
Expand All @@ -19,47 +17,47 @@ end
When row order is different, do a join to determine equality; use the
columns `cols`, join on `on` (often :name). The resulting DataFrame
is returned. It uniquifies columns with clashing names (see
`?DF.leftjoin`), and stores a "source" under the `:source` column.
`?DataFrames.leftjoin`), and stores a "source" under the `:source` column.

"""
function join_cmp(df1, df2, cols; on::Union{Symbol, Vector{Symbol}})
DF.leftjoin(df1[!, cols], df2[!, cols]; on = on, makeunique = true, source = :source)
DataFrames.leftjoin(df1[!, cols], df2[!, cols]; on = on, makeunique = true, source = :source)
end

@testset "Utilities" begin
csv_path = joinpath(DATA, "Norse/assets-data.csv")

@testset "get_tbl_name(source, tmp)" begin
for (name, tmp) in [["my_file", false], ["t_my_file", true]]
@test name == TIO.get_tbl_name("path/my-file.csv", tmp)
@test name == TulipaIO.get_tbl_name("path/my-file.csv", tmp)
end
end

# redundant for the current implementation, needed when we support globs
@testset "check_file(source)" begin
@test TIO.check_file(csv_path)
@test !TIO.check_file("not-there")
@test TulipaIO.check_file(csv_path)
@test !TulipaIO.check_file("not-there")
end

con = DBInterface.connect(DB)
con = DBInterface.connect(DuckDB.DB)
tbl_name = "mytbl"

@testset "check_tbl(con, source)" begin
DBInterface.execute(con, "CREATE TABLE $tbl_name AS SELECT * FROM range(5)")
@test TIO.check_tbl(con, tbl_name)
@test !TIO.check_tbl(con, "not_there")
@test TulipaIO.check_tbl(con, tbl_name)
@test !TulipaIO.check_tbl(con, "not_there")
end

@testset "Conditionally format source as SQL" begin
read_ = TIO.fmt_source(con, csv_path)
read_ = TulipaIO.fmt_source(con, csv_path)
@test occursin("read_csv", read_)
@test occursin(csv_path, read_)
@test TIO.fmt_source(con, tbl_name) == tbl_name
@test_throws TIO.NeitherTableNorFileError TIO.fmt_source(con, "not-there")
@test TulipaIO.fmt_source(con, tbl_name) == tbl_name
@test_throws TulipaIO.NeitherTableNorFileError TulipaIO.fmt_source(con, "not-there")
if (VERSION.major >= 1) && (VERSION.minor >= 8)
msg_re = r"not-there.+"
msg_re *= "$con"
@test_throws msg_re TIO.fmt_source(con, "not-there")
@test_throws msg_re TulipaIO.fmt_source(con, "not-there")
end
end
end
Expand All @@ -69,85 +67,91 @@ end
csv_copy = replace(csv_path, "data.csv" => "data-copy.csv")
csv_fill = replace(csv_path, "data.csv" => "data-alt.csv")

df_org = DF.DataFrame(CSV.File(csv_path; header = 2))
df_org = DataFrame(CSV.File(csv_path; header = 2))

@testset "CSV -> DataFrame" begin
con = DBInterface.connect(DB)
df_res = TIO.create_tbl(con, csv_path; show = true)
con = DBInterface.connect(DuckDB.DB)
df_res = TulipaIO.create_tbl(con, csv_path; show = true)
@test shape(df_org) == shape(df_res)
@test_throws TIO.FileNotFoundError TIO.create_tbl(con, "not-there")
@test_throws TulipaIO.FileNotFoundError TulipaIO.create_tbl(con, "not-there")
if (VERSION.major >= 1) && (VERSION.minor >= 8)
@test_throws r"not-there" TIO.create_tbl(con, "not-there")
@test_throws r"not-there" TulipaIO.create_tbl(con, "not-there")
end
end

@testset "CSV -> DataFrame w/ a schema" begin
con = DBInterface.connect(DB)
con = DBInterface.connect(DuckDB.DB)
mapping_csv_path = joinpath(DATA, "Norse/rep-periods-mapping.csv")
col_schema = Dict(:period => "INT", :rep_period => "VARCHAR", :weight => "DOUBLE")
TIO.create_tbl(con, mapping_csv_path; types = col_schema)
df_types = DD.query(con, "DESCRIBE rep_periods_mapping") |> DF.DataFrame
TulipaIO.create_tbl(con, mapping_csv_path; types = col_schema)
df_types = DuckDB.query(con, "DESCRIBE rep_periods_mapping") |> DataFrame
@test df_types.column_name == ["period", "rep_period", "weight"]
@test df_types.column_type == ["INTEGER", "VARCHAR", "DOUBLE"]
end

opts = Dict(:on => [:name], :cols => [:investable], :show => true)
@testset "CSV w/ alternatives -> DataFrame" begin
con = DBInterface.connect(DB)
df_res = TIO.create_tbl(con, csv_path, csv_copy; opts..., fill = false)
df_exp = DF.DataFrame(CSV.File(csv_copy; header = 2))
con = DBInterface.connect(DuckDB.DB)
df_res = TulipaIO.create_tbl(con, csv_path, csv_copy; opts..., fill = false)
df_exp = DataFrame(CSV.File(csv_copy; header = 2))
@test df_exp.investable == df_res.investable
@test df_org.investable != df_res.investable
end

@testset "no filling for missing rows" begin
con = DBInterface.connect(DB)
df_res = TIO.create_tbl(con, csv_path, csv_fill; opts..., fill = false)
df_ref = DF.DataFrame(CSV.File(csv_fill; header = 2))
con = DBInterface.connect(DuckDB.DB)
df_res = TulipaIO.create_tbl(con, csv_path, csv_fill; opts..., fill = false)
df_ref = DataFrame(CSV.File(csv_fill; header = 2))
# NOTE: row order is different, join to determine equality
cmp = join_cmp(df_res, df_ref, ["name", "investable"]; on = :name)
@test (DF.subset(cmp, :investable_1 => DF.ByRow(ismissing)).source .== "left_only") |> all
@test (DF.subset(cmp, :investable_1 => DF.ByRow(!ismissing)).source .== "both") |> all
@test (
DataFrames.subset(cmp, :investable_1 => DataFrames.ByRow(ismissing)).source .==
"left_only"
) |> all
@test (
DataFrames.subset(cmp, :investable_1 => DataFrames.ByRow(!ismissing)).source .== "both"
) |> all
end

@testset "back-filling missing rows" begin
con = DBInterface.connect(DB)
df_res = TIO.create_tbl(con, csv_path, csv_fill; opts..., fill = true)
df_exp = DF.DataFrame(CSV.File(csv_copy; header = 2))
con = DBInterface.connect(DuckDB.DB)
df_res = TulipaIO.create_tbl(con, csv_path, csv_fill; opts..., fill = true)
df_exp = DataFrame(CSV.File(csv_copy; header = 2))
cmp = join_cmp(df_exp, df_res, ["name", "investable"]; on = :name)
@test all(cmp.investable .== cmp.investable_1)
@test (cmp.source .== "both") |> all
end

@testset "back-filling missing rows w/ alternate values" begin
con = DBInterface.connect(DB)
df_res = TIO.create_tbl(
con = DBInterface.connect(DuckDB.DB)
df_res = TulipaIO.create_tbl(
con,
csv_path,
csv_fill;
opts...,
fill = true,
fill_values = Dict(:investable => true),
)
df_ref = DF.DataFrame(CSV.File(csv_fill; header = 2))
df_ref = DataFrame(CSV.File(csv_fill; header = 2))
cmp = join_cmp(df_res, df_ref, ["name", "investable"]; on = :name)
@test (DF.subset(cmp, :investable_1 => DF.ByRow(ismissing)).investable) |> all
@test (DataFrames.subset(cmp, :investable_1 => DataFrames.ByRow(ismissing)).investable) |>
all
end

@testset "temporary tables" begin
con = DBInterface.connect(DB)
tbl_name = TIO.create_tbl(con, csv_path; name = "tmp_assets", tmp = true)
con = DBInterface.connect(DuckDB.DB)
tbl_name = TulipaIO.create_tbl(con, csv_path; name = "tmp_assets", tmp = true)
@test tbl_name in tmp_tbls(con)[!, :name]

tbl_name = TIO.create_tbl(con, csv_path; tmp = true)
tbl_name = TulipaIO.create_tbl(con, csv_path; tmp = true)
@test tbl_name == "t_assets_data" # t_<cleaned up filename>
@test tbl_name in tmp_tbls(con)[!, :name]
end

@testset "CSV -> table" begin
con = DBInterface.connect(DB)
tbl_name = TIO.create_tbl(con, csv_path; name = "no_assets")
df_res = DF.DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name"))
con = DBInterface.connect(DuckDB.DB)
tbl_name = TulipaIO.create_tbl(con, csv_path; name = "no_assets")
df_res = DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name"))
@test shape(df_org) == shape(df_res)
# @show df_org[1:3, 1:5] df_res[1:3, 1:5]
#
Expand All @@ -169,36 +173,42 @@ end

@testset "table + CSV w/ alternatives -> table" begin
# test setup
con = DBInterface.connect(DB)
TIO.create_tbl(con, csv_path; name = "no_assets")
con = DBInterface.connect(DuckDB.DB)
TulipaIO.create_tbl(con, csv_path; name = "no_assets")

opts = Dict(:on => [:name], :cols => [:investable])
tbl_name =
TIO.create_tbl(con, "no_assets", csv_copy; name = "alt_assets", opts..., fill = false)
df_res = DF.DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name"))
df_exp = DF.DataFrame(CSV.File(csv_copy; header = 2))
tbl_name = TulipaIO.create_tbl(
con,
"no_assets",
csv_copy;
name = "alt_assets",
opts...,
fill = false,
)
df_res = DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name"))
df_exp = DataFrame(CSV.File(csv_copy; header = 2))
@test df_exp.investable == df_res.investable
@test df_org.investable != df_res.investable

@testset "back-filling missing rows" begin
tbl_name = TIO.create_tbl(
tbl_name = TulipaIO.create_tbl(
con,
"no_assets",
csv_fill;
name = "alt_assets_filled",
opts...,
fill = true,
)
df_res = DF.DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name"))
df_exp = DF.DataFrame(CSV.File(csv_copy; header = 2))
df_res = DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name"))
df_exp = DataFrame(CSV.File(csv_copy; header = 2))
# NOTE: row order is different, join to determine equality
cmp = join_cmp(df_exp, df_res, ["name", "investable"]; on = :name)
@test all(cmp.investable .== cmp.investable_1)
@test (cmp.source .== "both") |> all
end

@testset "back-filling missing rows w/ alternate values" begin
tbl_name = TIO.create_tbl(
tbl_name = TulipaIO.create_tbl(
con,
"no_assets",
csv_fill;
Expand All @@ -207,10 +217,12 @@ end
fill = true,
fill_values = Dict(:investable => true),
)
df_res = DF.DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name"))
df_ref = DF.DataFrame(CSV.File(csv_fill; header = 2))
df_res = DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name"))
df_ref = DataFrame(CSV.File(csv_fill; header = 2))
cmp = join_cmp(df_res, df_ref, ["name", "investable"]; on = :name)
@test (DF.subset(cmp, :investable_1 => DF.ByRow(ismissing)).investable) |> all
@test (
DataFrames.subset(cmp, :investable_1 => DataFrames.ByRow(ismissing)).investable
) |> all
end
end
end
Expand All @@ -220,41 +232,46 @@ end
csv_copy = replace(csv_path, "data.csv" => "data-copy.csv")
csv_fill = replace(csv_path, "data.csv" => "data-alt.csv")

df_org = DF.DataFrame(CSV.File(csv_path; header = 2))
df_org = DataFrame(CSV.File(csv_path; header = 2))

opts = Dict(:on => :name, :name => "dummy", :show => true)
@testset "w/ vector" begin
con = DBInterface.connect(DB)
df_exp = DF.DataFrame(CSV.File(csv_copy; header = 2))
df_res = TIO.create_tbl(con, csv_path, Dict(:investable => df_exp.investable); opts...)
con = DBInterface.connect(DuckDB.DB)
df_exp = DataFrame(CSV.File(csv_copy; header = 2))
df_res = TulipaIO.create_tbl(con, csv_path, Dict(:investable => df_exp.investable); opts...)
# NOTE: row order is different, join to determine equality
cmp = join_cmp(df_exp, df_res, ["name", "investable"]; on = :name)
investable = cmp[!, [c for c in propertynames(cmp) if occursin("investable", String(c))]]
@test isequal.(investable[!, 1], investable[!, 2]) |> all

# stupid Julia! grow up!
args = [con, csv_path, Dict(:investable => df_exp.investable[2:end])]
@test_throws DimensionMismatch TIO.create_tbl(args...; opts...)
@test_throws DimensionMismatch TulipaIO.create_tbl(args...; opts...)
if (VERSION.major >= 1) && (VERSION.minor >= 8)
@test_throws r"Length.+different" TIO.create_tbl(args...; opts...)
@test_throws r"index.+value" TIO.create_tbl(args...; opts...)
@test_throws r"Length.+different" TulipaIO.create_tbl(args...; opts...)
@test_throws r"index.+value" TulipaIO.create_tbl(args...; opts...)
end
end

@testset "w/ constant" begin
con = DBInterface.connect(DB)
df_res = TIO.create_tbl(con, csv_path, Dict(:investable => true); opts...)
con = DBInterface.connect(DuckDB.DB)
df_res = TulipaIO.create_tbl(con, csv_path, Dict(:investable => true); opts...)
@test df_res.investable |> all

table_name = TIO.create_tbl(con, csv_path, Dict(:investable => true); on = :name)
table_name = TulipaIO.create_tbl(con, csv_path, Dict(:investable => true); on = :name)
@test "assets_data" == table_name
end

@testset "w/ constant after filtering" begin
con = DBInterface.connect(DB)
where_clause = TIO.FmtSQL.@where_(lifetime in 25:50, name % "Valhalla_%")
df_res =
TIO.create_tbl(con, csv_path, Dict(:investable => true); opts..., where_ = where_clause)
con = DBInterface.connect(DuckDB.DB)
where_clause = TulipaIO.FmtSQL.@where_(lifetime in 25:50, name % "Valhalla_%")
df_res = TulipaIO.create_tbl(
con,
csv_path,
Dict(:investable => true);
opts...,
where_ = where_clause,
)
@test shape(df_res) == shape(df_org)
df_res =
filter(row -> 25 <= row.lifetime <= 50 && startswith(row.name, "Valhalla_"), df_res)
Expand Down
Loading