Skip to content

Commit

Permalink
Create convenience function to read all CSV from folder (#57)
Browse files Browse the repository at this point in the history
  • Loading branch information
abelsiqueira authored Jun 28, 2024
1 parent e9b563d commit 84e566e
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 0 deletions.
3 changes: 3 additions & 0 deletions src/TulipaIO.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,7 @@ include("parsers.jl")
include("fmtsql.jl")
include("pipeline.jl")

# Convenience functions
include("convenience.jl")

end
56 changes: 56 additions & 0 deletions src/convenience.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
export read_csv_folder, show_tables, get_table

"""
read_csv_folder(connection, folder)
Read all CSV files in the `folder` and create a table for each in the `connection`.
## Keywords arguments
- `table_name_prefix = ""`
- `table_name_suffix = ""`
"""
function read_csv_folder(connection, folder; table_name_prefix = "", table_name_suffix = "")
for filename in readdir(folder)
if !endswith(".csv")(filename)
continue
end
table_name, _ = splitext(filename)
table_name = replace(table_name, "-" => "_")
table_name = table_name_prefix * table_name * table_name_suffix
create_tbl(connection, joinpath(folder, filename); name = table_name)
end

return connection
end

"""
df = show_tables(connection)
query = show_tables(Val(:raw), connection)
Run the `SHOW TABLES` sql command.
The `Val(:raw)` variant returns the raw output from DuckDB, otherwise we construct a DataFrame.
"""
function show_tables(::Val{:raw}, connection)
DBInterface.execute(connection, "SHOW TABLES")
end

"""
df = get_table(connection, table_name)
query = get_table(Val(:raw), connection, table_name)
Run the `SELECT * FROM table_name` sql command.
The `Val(:raw)` variant returns the raw output from DuckDB, otherwise we construct a DataFrame.
"""
function get_table(::Val{:raw}, connection, table_name)
DBInterface.execute(connection, "SELECT * FROM $table_name")
end

for foo in (:show_tables, :get_table)
@eval begin
$foo(con::DBInterface.Connection, args...; kwargs...) =
DF.DataFrame($foo(Val(:raw), con, args...; kwargs...))
end
end
26 changes: 26 additions & 0 deletions test/test-convenience.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
using CSV, DataFrames, DuckDB, TulipaIO

@testset "Test convenience functions" begin
@testset "Read CSV folder" begin
tmpdir = mktempdir()
CSV.write(
joinpath(tmpdir, "some-file.csv"),
DataFrame(:a => ["A", "B", "C"], :x => rand(3)),
)
open(joinpath(tmpdir, "ignore-this-file.txt"), "w") do io
println(io, "Nothing")
end

connection = DBInterface.connect(DuckDB.DB)
read_csv_folder(connection, tmpdir)
@test (DBInterface.execute(connection, "SHOW TABLES") |> DataFrame |> df -> df.name) ==
["some_file"]
end

@testset "Test show_tables and get_table" begin
connection = DBInterface.connect(DuckDB.DB)
create_tbl(connection, "data/Norse/assets-data.csv"; name = "my_table")
@test show_tables(connection).name == ["my_table"]
@test "Asgard_Battery" in get_table(connection, "my_table").name
end
end

0 comments on commit 84e566e

Please sign in to comment.