From ab7be2f077748662e5fca8ee5dcc0408a55c7104 Mon Sep 17 00:00:00 2001 From: Diego Alejandro Tejada Arango <12887482+datejada@users.noreply.github.com> Date: Mon, 20 Jan 2025 17:16:41 +0100 Subject: [PATCH] Update defaults for reading files (#86) * Update CSV files first row * Update TulipaIO defaults for reading * Update TulipaIO tests * Update gitignore * Update project TOML * Delete the skip row option --- .gitignore | 2 ++ Project.toml | 2 +- src/pipeline.jl | 4 ++-- test/data/Norse/assets-data-alt.csv | 1 - test/data/Norse/assets-data-copy.csv | 1 - test/data/Norse/assets-data.csv | 1 - test/data/Norse/assets-partitions.csv | 1 - test/data/Norse/assets-profiles.csv | 1 - test/data/Norse/flows-data.csv | 1 - test/data/Norse/flows-partitions.csv | 1 - test/data/Norse/flows-profiles.csv | 1 - test/data/Norse/rep-periods-data.csv | 1 - test/data/Norse/rep-periods-mapping.csv | 1 - test/test-pipeline.jl | 20 ++++++++++---------- 14 files changed, 15 insertions(+), 23 deletions(-) diff --git a/.gitignore b/.gitignore index c3dfb74..f6bdfbe 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ Manifest.toml docs/build/ *.rej node_modules +.vscode +debugging diff --git a/Project.toml b/Project.toml index 21faf3b..0ed9c06 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TulipaIO" uuid = "7b3808b7-0819-42d4-885c-978ba173db11" authors = ["Suvayu Ali and contributors"] -version = "0.2.1" +version = "0.3.0" [deps] CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" diff --git a/src/pipeline.jl b/src/pipeline.jl index e152120..3e71e7d 100644 --- a/src/pipeline.jl +++ b/src/pipeline.jl @@ -5,8 +5,8 @@ using .FmtSQL: fmt_join, fmt_read, fmt_select export create_tbl, tbl_select, as_table -# default options (for now) -_read_opts = pairs((header = true, skip = 1)) +# default options for reading +_read_opts = pairs((header = true,)) function check_file(source::String) # FIXME: handle globs diff --git a/test/data/Norse/assets-data-alt.csv b/test/data/Norse/assets-data-alt.csv index d450145..8d8177a 100644 --- a/test/data/Norse/assets-data-alt.csv +++ b/test/data/Norse/assets-data-alt.csv @@ -1,4 +1,3 @@ -,{true;false} name,investable Asgard_CCGT,false Midgard_Hydro,true diff --git a/test/data/Norse/assets-data-copy.csv b/test/data/Norse/assets-data-copy.csv index 9830f71..782205b 100644 --- a/test/data/Norse/assets-data-copy.csv +++ b/test/data/Norse/assets-data-copy.csv @@ -1,4 +1,3 @@ -,{producer;consumer;storage;hub;conversion},{true;false},{true;false},{true;false},kEUR/MWh,kEUR/MW/year,MW,MW,MW,MW,MW/unit,MW/unit,MW,MW,MWh,MWh,h,kEUR/MW/year,year,0 name,type,active,investable,investment_integer,variable_cost,investment_cost,investment_limit,capacity,initial_capacity,peak_demand,charging_capacity,discharging_capacity,initial_charging_capacity,initial_discharging_capacity,initial_storage_capacity,initial_storage_level,energy_to_power_ratio,fixed_cost,lifetime,efficiency Asgard_Battery,storage,true,true,true,0.003,300,,10,0,0,1,1,0,0,0,,100,3,10,0 Asgard_Solar,producer,true,true,true,0.001,350,50000,100,0,0,0,0,0,0,0,0,0,8,25,0 diff --git a/test/data/Norse/assets-data.csv b/test/data/Norse/assets-data.csv index 780e42e..1ae7b66 100644 --- a/test/data/Norse/assets-data.csv +++ b/test/data/Norse/assets-data.csv @@ -1,4 +1,3 @@ -,{producer;consumer;storage;hub;conversion},{true;false},{true;false},{true;false},kEUR/MWh,kEUR/MW/year,MW,MW,MW,MW,MW/unit,MW/unit,MW,MW,MWh,MWh,h,kEUR/MW/year,year,0 name,type,active,investable,investment_integer,variable_cost,investment_cost,investment_limit,capacity,initial_capacity,peak_demand,charging_capacity,discharging_capacity,initial_charging_capacity,initial_discharging_capacity,initial_storage_capacity,initial_storage_level,energy_to_power_ratio,fixed_cost,lifetime,efficiency Asgard_Battery,storage,true,true,true,0.003,300,,10,0,0,1,1,0,0,0,,100,3,10,0 Asgard_Solar,producer,true,true,true,0.001,350,50000,100,0,0,0,0,0,0,0,0,0,8,25,0 diff --git a/test/data/Norse/assets-partitions.csv b/test/data/Norse/assets-partitions.csv index 754d3ec..a0a7ce3 100644 --- a/test/data/Norse/assets-partitions.csv +++ b/test/data/Norse/assets-partitions.csv @@ -1,4 +1,3 @@ -,,, asset,rep_period_id,specification,partition Asgard_Solar,1,uniform,4 Asgard_E_demand,1,explicit,7;7;7;21;21;21;21;21;21;21 diff --git a/test/data/Norse/assets-profiles.csv b/test/data/Norse/assets-profiles.csv index d4715c3..04162a6 100644 --- a/test/data/Norse/assets-profiles.csv +++ b/test/data/Norse/assets-profiles.csv @@ -1,4 +1,3 @@ -,,,p.u. asset,rep_period_id,time_step,value Asgard_Solar,1,1,0 Asgard_Solar,1,2,0 diff --git a/test/data/Norse/flows-data.csv b/test/data/Norse/flows-data.csv index 695f35a..3b3a680 100644 --- a/test/data/Norse/flows-data.csv +++ b/test/data/Norse/flows-data.csv @@ -1,4 +1,3 @@ -,asset_name,asset_name,{true;false},{true;false},{true;false},{true;false},kEUR/MWh,kEUR/MW/year,MW,MW,MW,MW,p.u. carrier,from_asset,to_asset,active,is_transport,investable,investment_integer,variable_cost,investment_cost,investment_limit,capacity,initial_export_capacity,initial_import_capacity,efficiency electricity,Asgard_Battery,Asgard_E_demand,true,false,false,false,0.003,0,0,0,0,0,0.95 electricity,Asgard_Solar,Asgard_Battery,true,false,false,false,0.001,0,0,0,0,0,0.95 diff --git a/test/data/Norse/flows-partitions.csv b/test/data/Norse/flows-partitions.csv index f23224c..8a3ee27 100644 --- a/test/data/Norse/flows-partitions.csv +++ b/test/data/Norse/flows-partitions.csv @@ -1,4 +1,3 @@ -,,,, from_asset,to_asset,rep_period_id,specification,partition Asgard_Solar,Asgard_Battery,2,math,4x3+3x4 Asgard_Solar,Asgard_E_demand,2,math,3x4+4x3 diff --git a/test/data/Norse/flows-profiles.csv b/test/data/Norse/flows-profiles.csv index e984b7d..8bdc8a4 100644 --- a/test/data/Norse/flows-profiles.csv +++ b/test/data/Norse/flows-profiles.csv @@ -1,4 +1,3 @@ -,,,,p.u. from_asset,to_asset,rep_period_id,time_step,value Asgard_E_demand,Valhalla_E_balance,1,1,0.95 Asgard_E_demand,Valhalla_E_balance,1,2,0.95 diff --git a/test/data/Norse/rep-periods-data.csv b/test/data/Norse/rep-periods-data.csv index ebb6e12..19651ff 100644 --- a/test/data/Norse/rep-periods-data.csv +++ b/test/data/Norse/rep-periods-data.csv @@ -1,4 +1,3 @@ -,,hours id,num_time_steps,resolution 1,168,1.0 2,24,1.0 diff --git a/test/data/Norse/rep-periods-mapping.csv b/test/data/Norse/rep-periods-mapping.csv index 484c12d..043583a 100644 --- a/test/data/Norse/rep-periods-mapping.csv +++ b/test/data/Norse/rep-periods-mapping.csv @@ -1,4 +1,3 @@ -,, period,rep_period,weight 1,2,1.0 2,2,1.0 diff --git a/test/test-pipeline.jl b/test/test-pipeline.jl index f6ef7c6..0a5fcee 100644 --- a/test/test-pipeline.jl +++ b/test/test-pipeline.jl @@ -67,7 +67,7 @@ end csv_copy = replace(csv_path, "data.csv" => "data-copy.csv") csv_fill = replace(csv_path, "data.csv" => "data-alt.csv") - df_org = DataFrame(CSV.File(csv_path; header = 2)) + df_org = DataFrame(CSV.File(csv_path)) @testset "CSV -> DataFrame" begin con = DBInterface.connect(DuckDB.DB) @@ -93,7 +93,7 @@ end @testset "CSV w/ alternatives -> DataFrame" begin con = DBInterface.connect(DuckDB.DB) df_res = TulipaIO.create_tbl(con, csv_path, csv_copy; opts..., fill = false) - df_exp = DataFrame(CSV.File(csv_copy; header = 2)) + df_exp = DataFrame(CSV.File(csv_copy)) @test df_exp.investable == df_res.investable @test df_org.investable != df_res.investable end @@ -101,7 +101,7 @@ end @testset "no filling for missing rows" begin con = DBInterface.connect(DuckDB.DB) df_res = TulipaIO.create_tbl(con, csv_path, csv_fill; opts..., fill = false) - df_ref = DataFrame(CSV.File(csv_fill; header = 2)) + df_ref = DataFrame(CSV.File(csv_fill)) # NOTE: row order is different, join to determine equality cmp = join_cmp(df_res, df_ref, ["name", "investable"]; on = :name) @test ( @@ -116,7 +116,7 @@ end @testset "back-filling missing rows" begin con = DBInterface.connect(DuckDB.DB) df_res = TulipaIO.create_tbl(con, csv_path, csv_fill; opts..., fill = true) - df_exp = DataFrame(CSV.File(csv_copy; header = 2)) + df_exp = DataFrame(CSV.File(csv_copy)) cmp = join_cmp(df_exp, df_res, ["name", "investable"]; on = :name) @test all(cmp.investable .== cmp.investable_1) @test (cmp.source .== "both") |> all @@ -132,7 +132,7 @@ end fill = true, fill_values = Dict(:investable => true), ) - df_ref = DataFrame(CSV.File(csv_fill; header = 2)) + df_ref = DataFrame(CSV.File(csv_fill)) cmp = join_cmp(df_res, df_ref, ["name", "investable"]; on = :name) @test (DataFrames.subset(cmp, :investable_1 => DataFrames.ByRow(ismissing)).investable) |> all @@ -186,7 +186,7 @@ end fill = false, ) df_res = DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name")) - df_exp = DataFrame(CSV.File(csv_copy; header = 2)) + df_exp = DataFrame(CSV.File(csv_copy)) @test df_exp.investable == df_res.investable @test df_org.investable != df_res.investable @@ -200,7 +200,7 @@ end fill = true, ) df_res = DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name")) - df_exp = DataFrame(CSV.File(csv_copy; header = 2)) + df_exp = DataFrame(CSV.File(csv_copy)) # NOTE: row order is different, join to determine equality cmp = join_cmp(df_exp, df_res, ["name", "investable"]; on = :name) @test all(cmp.investable .== cmp.investable_1) @@ -218,7 +218,7 @@ end fill_values = Dict(:investable => true), ) df_res = DataFrame(DBInterface.execute(con, "SELECT * FROM $tbl_name")) - df_ref = DataFrame(CSV.File(csv_fill; header = 2)) + df_ref = DataFrame(CSV.File(csv_fill)) cmp = join_cmp(df_res, df_ref, ["name", "investable"]; on = :name) @test ( DataFrames.subset(cmp, :investable_1 => DataFrames.ByRow(ismissing)).investable @@ -232,12 +232,12 @@ end csv_copy = replace(csv_path, "data.csv" => "data-copy.csv") csv_fill = replace(csv_path, "data.csv" => "data-alt.csv") - df_org = DataFrame(CSV.File(csv_path; header = 2)) + df_org = DataFrame(CSV.File(csv_path)) opts = Dict(:on => :name, :name => "dummy", :show => true) @testset "w/ vector" begin con = DBInterface.connect(DuckDB.DB) - df_exp = DataFrame(CSV.File(csv_copy; header = 2)) + df_exp = DataFrame(CSV.File(csv_copy)) df_res = TulipaIO.create_tbl(con, csv_path, Dict(:investable => df_exp.investable); opts...) # NOTE: row order is different, join to determine equality cmp = join_cmp(df_exp, df_res, ["name", "investable"]; on = :name)