Create model schema from a JSON file (#1054)

* Remove prettier from the pre-commit * Add JSON schema file * Add JSON package to the dependencies
TulipaEnergy · Mar 5, 2025 · ae662fe · ae662fe
1 parent aefad2e
commit ae662fe
Show file tree

Hide file tree

Showing 7 changed files with 882 additions and 245 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -37,12 +37,6 @@ repos:
     rev: 054bda51dbe278b3e86f27c890e3f3ac877d616c
     hooks:
       - id: validate-cff
-  - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: "v4.0.0-alpha.8" # Use the sha or tag you want to point at
-    hooks:
-      - id: prettier
-        types_or: [yaml, json]
-        exclude: ".copier-answers.yml"
   - repo: https://github.com/adrienverge/yamllint
     rev: v1.35.1
     hooks:

diff --git a/Project.toml b/Project.toml
@@ -8,6 +8,7 @@ CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
 DuckDB = "d2f5444f-75bc-4fdf-ac35-56f514c445e1"
 DuckDB_jll = "2cbbab25-fc8b-58cf-88d4-687a02676033"
 HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
+JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
@@ -23,6 +24,7 @@ CSV = "0.10"
 DuckDB = "0.10, ~1.0"
 DuckDB_jll = "0.10, ~1.0"
 HiGHS = "1"
+JSON = "0.21.4"
 JuMP = "1"
 MathOptInterface = "1"
 OrderedCollections = "1"

diff --git a/docs/Project.toml b/docs/Project.toml
@@ -5,7 +5,9 @@ Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
 DuckDB = "d2f5444f-75bc-4fdf-ac35-56f514c445e1"
 GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6"
 HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
+JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
 LiveServer = "16fef848-5104-11e9-1b77-fb7a48bbb589"
+OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 TulipaEnergyModel = "5d7bd171-d18e-45a5-9111-f1f11ac5d04d"
 TulipaIO = "7b3808b7-0819-42d4-885c-978ba173db11"
diff --git a/docs/src/50-schemas.md b/docs/src/50-schemas.md
@@ -2,7 +2,7 @@
 
 The optimization model parameters with the input data must follow the schema below for each table. To create these tables we currently use CSV files that follow this same schema and then convert them into tables using TulipaIO, as shown in the basic example of the [Tutorials](@ref basic-example) section.
 
-The schemas can be accessed at any time after loading the package by typing `TulipaEnergyModel.schema_per_table_name` in the Julia console. Here is the complete list of model parameters in the schemas per table (or CSV file):
+The schemas can be found in the `input-schemas.json`. For more advanced users, they can also access the schemas at any time after loading the package by typing `TulipaEnergyModel.schema_per_table_name` in the Julia console. Here is the complete list of model parameters in the schemas per table (or CSV file):
 
 !!! info "Optional tables/files and their defaults"
     The following tables/files are allowed to be missing: "assets\_rep\_periods\_partitions", "assets\_timeframe\_partitions", "assets\_timeframe\_profiles", "flows\_rep\_periods\_partitions", "group\_asset", "profiles\_timeframe".
@@ -11,14 +11,56 @@ The schemas can be accessed at any time after loading the package by typing `Tul
     - If no group table/file is available there will be no group constraints in the model
 
 ```@eval
-using Markdown, TulipaEnergyModel
-
-Markdown.parse(
-    join(["- **`$filename`**\n" *
-        join(
-            ["  - `$f: $t`" for (f, t) in schema],
-            "\n",
-        ) for (filename, schema) in TulipaEnergyModel.schema_per_table_name
-    ] |> sort, "\n")
-)
+"""
+The output of the following code is a Markdown text with the following structure:
+
+TABLE_NAME
+=========
+
+PARAMETER_NAME
+
+  •  Description: Lorem ipsum
+  •  Type: SQL type of the parameter
+  •  Default: a value or "No default"
+  •  Unit of measure: a value or "No unit"
+  •  Constraints: a table or "No constraints"
+"""
+
+using Markdown, JSON
+using OrderedCollections: OrderedDict
+
+input_schemas = JSON.parsefile("../../src/input-schemas.json"; dicttype = OrderedDict)
+
+let buffer = IOBuffer()
+    for (i,(table_name, fields)) in enumerate(input_schemas)
+        write(buffer, "## Table $i : `$table_name`\n\n")
+        for (field_name, field_info) in fields
+            _description = get(field_info, "description", "No description provided")
+            _type = get(field_info, "type", "Unknown type")
+            _unit = get(field_info, "unit_of_measure", "No unit")
+            _default = get(field_info, "default", "No default")
+            _constraints_values = get(field_info, "constraints", nothing)
+
+            write(buffer, "**`$field_name`**\n\n")
+            write(buffer, "- Description: $_description\n\n")
+            write(buffer, "- Type: `$_type`\n")
+            write(buffer, "- Unit of measure: `$_unit` \n")
+            write(buffer, "- Default: `$_default`\n")
+
+            if _constraints_values === nothing
+                write(buffer, "- Constraints: No constraints\n")
+            elseif isa(_constraints_values, OrderedDict)
+                write(buffer, "| Constraints | Value |\n| --- | --- |\n")
+                for (key, value) in _constraints_values
+                    write(buffer, "| $key | `$value` |\n")
+                end
+                write(buffer, "\n")
+            else
+                write(buffer, "- Constraints: `$(string(_constraints_values))`\n")
+            end
+        end
+    end
+    Markdown.parse(String(take!(buffer)))
+end
+
 ```
diff --git a/src/TulipaEnergyModel.jl b/src/TulipaEnergyModel.jl
@@ -8,6 +8,7 @@ const SQL_FOLDER = joinpath(@__DIR__, "sql")
 using CSV: CSV
 using DuckDB: DuckDB, DBInterface
 using TOML: TOML
+using JSON: JSON
 using TulipaIO: TulipaIO
 
 ## Optimization

diff --git a/src/input-schemas.jl b/src/input-schemas.jl
@@ -1,232 +1,8 @@
-# At the end of the file, there is a reference relating schemas and files
+# read schema from file
 
-const schemas = (
-    assets = (
-        # Schema for asset.csv
-        basic = (
-            :asset => "VARCHAR",                              # Name of Asset (geographical?)
-            :type => "VARCHAR",                              # Producer/Consumer/Storage/Conversion
-            :group => "VARCHAR",                             # Group to which the asset belongs to (missing -> no group)
-            :capacity => "DOUBLE",                           # MW
-            :min_operating_point => "DOUBLE",                # Minimum operating point or minimum stable generation level defined as a portion of the capacity of asset [p.u.]
-            :investment_method => "VARCHAR",                 # Which method of investment (simple/compact)
-            :investment_integer => "BOOLEAN",                # Whether investment is integer or continuous
-            :technical_lifetime => "INTEGER",                # years
-            :economic_lifetime => "INTEGER",                 # years
-            :discount_rate => "DOUBLE",                      # p.u.
-            :consumer_balance_sense => "VARCHAR",            # Sense of the consumer balance constraint (default ==)
-            :capacity_storage_energy => "DOUBLE",            # MWh
-            :is_seasonal => "BOOLEAN",                       # Whether seasonal storage (e.g. hydro) or not (e.g. battery)
-            :use_binary_storage_method => "VARCHAR",         # Whether to use an extra binary variable for the storage assets to avoid charging and discharging simultaneously (missing;binary;relaxed_binary)
-            :unit_commitment => "BOOLEAN",                   # Whether asset has unit commitment constraints
-            :unit_commitment_method => "VARCHAR",            # Which unit commitment method to use (i.e., basic)
-            :unit_commitment_integer => "BOOLEAN",           # Whether the unit commitment variables are integer or not
-            :ramping => "BOOLEAN",                           # Whether asset has ramping constraints
-            :storage_method_energy => "BOOLEAN",             # Whether storage method is energy or not (i.e., fixed_ratio)
-            :energy_to_power_ratio => "DOUBLE",              # Hours
-            :investment_integer_storage_energy => "BOOLEAN", # Whether investment for storage energy is integer or continuous
-            :max_ramp_up => "DOUBLE",                        # Maximum ramping up rate as a portion of the capacity of asset [p.u./h]
-            :max_ramp_down => "DOUBLE",                      # Maximum ramping down rate as a portion of the capacity of asset [p.u./h]
-        ),
-
-        # Schema for asset-milestone.csv
-        milestone = OrderedDict(
-            :asset => "VARCHAR",
-            :milestone_year => "INTEGER",
-            :investable => "BOOLEAN",                        # Whether able to invest
-            :peak_demand => "DOUBLE",                        # MW
-            :storage_inflows => "DOUBLE",                    # MWh/year
-            :initial_storage_level => "DOUBLE",              # MWh (Missing -> free initial level)
-            :min_energy_timeframe_partition => "DOUBLE",     # MWh (Missing -> no limit)
-            :max_energy_timeframe_partition => "DOUBLE",     # MWh (Missing -> no limit)
-            :units_on_cost => "DOUBLE",                      # Objective function coefficient on `units_on` variable. e.g., no-load cost or idling cost
-        ),
-
-        # Schema for the asset-commission.csv
-        commission = OrderedDict(
-            :asset => "VARCHAR",
-            :commission_year => "INTEGER",                   # Year of commissioning
-            :fixed_cost => "DOUBLE",                         # kEUR/MW/year
-            :investment_cost => "DOUBLE",                    # kEUR/MW
-            :investment_limit => "DOUBLE",                   # MWh (Missing -> no limit)
-            :fixed_cost_storage_energy => "DOUBLE",          # kEUR/MWh/year
-            :investment_cost_storage_energy => "DOUBLE",     # kEUR/MWh
-            :investment_limit_storage_energy => "DOUBLE",    # MWh (Missing -> no limit)
-        ),
-
-        # Schema for the asset-both.csv file.
-        both = OrderedDict(
-            :asset => "VARCHAR",                              # Name of Asset (geographical?)
-            :milestone_year => "INTEGER",                              # Year
-            :commission_year => "INTEGER",                   # Year of commissioning
-            :decommissionable => "BOOLEAN",
-            :initial_units => "DOUBLE",                      # units
-            :initial_storage_units => "DOUBLE",              # units
-        ),
-
-        # Schema for the assets-profiles.csv and assets-timeframe-profiles.csv file.
-        profiles_reference = OrderedDict(
-            :asset => "VARCHAR",               # Asset name
-            :commission_year => "INTEGER",
-            :profile_type => "VARCHAR",        # Type of profile, used to determine source profile
-            :profile_name => "VARCHAR",        # Name of profile, used to match with the profiles_data
-        ),
-
-        # Schema for the assets-timeframe-partitions.csv file.
-        timeframe_partition = OrderedDict(
-            :asset => "VARCHAR",
-            :year => "INTEGER",
-            :specification => "VARCHAR",
-            :partition => "VARCHAR",
-        ),
-
-        # Schema for the assets-rep-periods-partitions.csv file.
-        rep_periods_partition = OrderedDict(
-            :asset => "VARCHAR",
-            :year => "INTEGER",
-            :rep_period => "INTEGER",
-            :specification => "VARCHAR",
-            :partition => "VARCHAR",
-        ),
-    ),
-    groups = (
-        # Schema for the group-asset.csv file.
-        data = OrderedDict(
-            :name => "VARCHAR",                # Name of the Group
-            :milestone_year => "INTEGER",
-            :invest_method => "BOOLEAN",       # true -> activate group constraints; false -> no group investment constraints
-            :min_investment_limit => "DOUBLE", # MW (Missing -> no limit)
-            :max_investment_limit => "DOUBLE", # MW (Missing -> no limit)
-        ),
-    ),
-    flows = (
-        # Schema for flow.csv
-        basic = (
-            :from_asset => "VARCHAR",                        # Name of Asset
-            :to_asset => "VARCHAR",                          # Name of Asset
-            :carrier => "VARCHAR",                           # (Optional?) Energy carrier
-            :is_transport => "BOOLEAN",                      # Whether a transport flow
-            :capacity => "DOUBLE",
-            :technical_lifetime => "INTEGER",
-            :economic_lifetime => "INTEGER",
-            :discount_rate => "DOUBLE",
-            :investment_integer => "BOOLEAN",       # Whether investment is integer or continuous
-        ),
-
-        # Schema for flow-milestone.csv
-        milestone = OrderedDict(
-            :from_asset => "VARCHAR",                        # Name of Asset
-            :to_asset => "VARCHAR",                          # Name of Asset
-            :milestone_year => "INTEGER",                   # Year of commissioning
-            :investable => "BOOLEAN",               # Whether able to invest
-            :variable_cost => "DOUBLE",             # kEUR/MWh
-        ),
-
-        # Schema for the flow-commission.csv
-        commission = OrderedDict(
-            :from_asset => "VARCHAR",                        # Name of Asset
-            :to_asset => "VARCHAR",                          # Name of Asset
-            :commission_year => "INTEGER",                   # Year of commissioning
-            :fixed_cost => "DOUBLE",                         # kEUR/MWh/year
-            :investment_cost => "DOUBLE",                    # kEUR/MW
-            :efficiency => "DOUBLE",                # p.u. (per unit)
-            :investment_limit => "DOUBLE",          # MW
-        ),
-
-        # Schema for the flow-both.csv file.
-        both = OrderedDict(
-            :from_asset => "VARCHAR",               # Name of Asset
-            :to_asset => "VARCHAR",                 # Name of Asset
-            :milestone_year => "INTEGER",
-            :commission_year => "INTEGER",          # Year of commissioning
-            :decommissionable => "BOOLEAN",
-            :initial_export_units => "DOUBLE",   # MW
-            :initial_import_units => "DOUBLE",   # MW
-        ),
-
-        # Schema for the flows-profiles file.
-        profiles_reference = OrderedDict(
-            :from_asset => "VARCHAR",          # Name of Asset
-            :to_asset => "VARCHAR",            # Name of Asset
-            :year => "INTEGER",
-            :profile_type => "VARCHAR",        # Type of profile, used to determine source profile
-            :profile_name => "VARCHAR",        # Name of profile, used to match with the profiles_data
-        ),
-
-        # Schema for the flows-rep-periods-partitions.csv file.
-        rep_periods_partition = OrderedDict(
-            :from_asset => "VARCHAR",          # Name of Asset
-            :to_asset => "VARCHAR",            # Name of Asset
-            :year => "INTEGER",
-            :rep_period => "INTEGER",
-            :specification => "VARCHAR",
-            :partition => "VARCHAR",
-        ),
-    ),
-    year = (
-        # Schema for year-data.csv
-        data = (
-            :year => "INTEGER",                       # Unique identifier (currently, the year itself)
-            :length => "INTEGER",
-            :is_milestone => "BOOLEAN",             # Whether the year is a milestone year of a vintage year
-        ),
-    ),
-    timeframe = (
-        # Schema for the profiles-timeframe-<type>.csv file.
-        profiles_data = OrderedDict(
-            :profile_name => "VARCHAR",      # Profile name
-            :year => "INTEGER",
-            :period => "INTEGER",            # Period
-            :value => "DOUBLE",              # p.u. (per unit)
-        ),
-    ),
-    rep_periods = (
-        # Schema for the rep-periods-data.csv file.
-        data = OrderedDict(
-            :year => "INTEGER",
-            :rep_period => "INTEGER",        # Representative period number
-            :num_timesteps => "INTEGER",     # Numer of timesteps
-            :resolution => "DOUBLE",         # Duration of each timestep (hours)
-        ),
-
-        # Schema for the rep-periods-mapping.csv file.
-        mapping = OrderedDict(
-            :year => "INTEGER",
-            :period => "INTEGER",            # Period number
-            :rep_period => "INTEGER",        # Representative period number
-            :weight => "DOUBLE",             # Hours
-        ),
-
-        # Schema for the profiles-rep-periods-<type>.csv file.
-        profiles_data = OrderedDict(
-            :profile_name => "VARCHAR",  # Profile name
-            :year => "INTEGER",
-            :rep_period => "INTEGER",    # Representative period number
-            :timestep => "INTEGER",      # Timestep number
-            :value => "DOUBLE",          # p.u. (per unit)
-        ),
-    ),
-)
+schema = JSON.parsefile("src/input-schemas.json"; dicttype = OrderedDict);
 
 const schema_per_table_name = OrderedDict(
-    "asset" => schemas.assets.basic,
-    "asset_both" => schemas.assets.both,
-    "asset_commission" => schemas.assets.commission,
-    "asset_milestone" => schemas.assets.milestone,
-    "assets_profiles" => schemas.assets.profiles_reference,
-    "assets_rep_periods_partitions" => schemas.assets.rep_periods_partition,
-    "assets_timeframe_partitions" => schemas.assets.timeframe_partition,
-    "assets_timeframe_profiles" => schemas.assets.profiles_reference,
-    "flow" => schemas.flows.basic,
-    "flow_both" => schemas.flows.both,
-    "flow_commission" => schemas.flows.commission,
-    "flow_milestone" => schemas.flows.milestone,
-    "flows_profiles" => schemas.flows.profiles_reference,
-    "flows_rep_periods_partitions" => schemas.flows.rep_periods_partition,
-    "group_asset" => schemas.groups.data,
-    "profiles_rep_periods" => schemas.rep_periods.profiles_data,
-    "profiles_timeframe" => schemas.timeframe.profiles_data,
-    "rep_periods_data" => schemas.rep_periods.data,
-    "rep_periods_mapping" => schemas.rep_periods.mapping,
-    "year_data" => schemas.year.data,
+    schema_key => OrderedDict(key => value["type"] for (key, value) in schema_content) for
+    (schema_key, schema_content) in schema
 )