Skip to content

Commit

Permalink
Less verbose OCEL2 XML import + Release v0.2.4
Browse files Browse the repository at this point in the history
  • Loading branch information
aarkue committed Jul 8, 2024
1 parent 2c26ff4 commit 6531bd5
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 29 deletions.
19 changes: 5 additions & 14 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rustxes"
version = "0.2.3"
version = "0.2.4"
edition = "2021"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
Expand All @@ -11,13 +11,13 @@ crate-type = ["cdylib"]
[dependencies]
pyo3 = "0.20.0"
rayon = "1.7.0"
quick-xml = "0.30.0"
quick-xml = "0.31.0"
flate2 = "1.0"
polars = {version = "0.38.1", features = ["dtype-slim","lazy","timezones","rows","diagonal_concat"]}
pyo3-polars = "0.12.0"
serde_json = "1.0.105"
serde = {version = "1.0.188", features = ["derive"]}
chrono = {version = "0.4.29", features = ["serde"] }
uuid = {version = "1.4.1", features = ["v4", "serde"]}
process_mining = "0.3.13"
process_mining = "0.3.14"
# process_mining = {path = "../../rust-bridge-process-mining/process_mining"}
33 changes: 28 additions & 5 deletions example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -12,17 +12,19 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"ocel_xml_path = \"/home/aarkue/doc/projects/rust-bridge-process-mining/process_mining/src/event_log/tests/test_data/order-management.xml\"\n",
"ocel_xml_path = \"test_data/ContainerLogistics.xml\"\n",
"\n",
"\n",
"ocel_rs = rustxes.import_ocel_xml_pm4py(ocel_xml_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -31,6 +33,27 @@
"ocel_py = pm4py.read_ocel2_xml(ocel_xml_path)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"13910\n",
"13882\n",
"False\n"
]
}
],
"source": [
"print(len(ocel_rs.objects))\n",
"print(len(ocel_py.objects))\n",
"print(len(ocel_rs.objects) == len(ocel_py.objects))"
]
},
{
"cell_type": "code",
"execution_count": null,
Expand Down Expand Up @@ -353,7 +376,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
"version": "3.12.4"
},
"orig_nbformat": 4
},
Expand Down
29 changes: 22 additions & 7 deletions src/ocel/mod.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
use std::collections::{HashMap, HashSet};
use std::{
collections::{HashMap, HashSet},
io::BufReader,
};

use chrono::DateTime;
use polars::{prelude::*, series::Series};
use process_mining::{
import_ocel_json_from_path, import_ocel_xml_file, ocel::ocel_struct::OCELAttributeValue, OCEL,
import_ocel_json_from_path, import_ocel_xml, import_ocel_xml_file,
ocel::{
ocel_struct::OCELAttributeValue,
xml_ocel_import::{import_ocel_xml_file_with, OCELImportOptions},
},
OCEL,
};
use pyo3::{pyfunction, PyResult};
use pyo3_polars::PyDataFrame;
use quick_xml::Reader;

fn ocel_attribute_val_to_any_value<'a>(
val: &'a OCELAttributeValue,
Expand Down Expand Up @@ -189,10 +198,10 @@ pub fn ocel2_to_df(ocel: &OCEL) -> OCEL2DataFrames {
if let Some(obj_type) = obj_id_to_type_map.get(&r.object_id) {
AnyValue::StringOwned((*obj_type).into())
} else {
eprintln!(
"Invalid object id in E2O reference: Event: {}, Object: {}",
_e.id, r.object_id
);
// eprintln!(
// "Invalid object id in E2O reference: Event: {}, Object: {}",
// _e.id, r.object_id
// );
AnyValue::Null
}
})
Expand Down Expand Up @@ -443,7 +452,13 @@ pub fn ocel_dfs_to_py(ocel_dfs: OCEL2DataFrames) -> HashMap<String, PyDataFrame>

#[pyfunction]
pub fn import_ocel_xml_rs(path: String) -> PyResult<HashMap<String, PyDataFrame>> {
let ocel = import_ocel_xml_file(&path);
let ocel = import_ocel_xml_file_with(
&path,
OCELImportOptions {
verbose: false,
..Default::default()
},
);
let ocel_dfs = ocel2_to_df(&ocel);
Ok(ocel_dfs_to_py(ocel_dfs))
}
Expand Down
20 changes: 20 additions & 0 deletions src/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,26 @@ mod xes_tests {
// );
// }

#[test]
fn test_ocel2_container_df() {
let now = Instant::now();
let ocel_bytes = include_bytes!("../test_data/ContainerLogistics.xml");
let ocel = import_ocel_xml_slice(ocel_bytes);
let ocel_dfs = ocel2_to_df(&ocel);
println!(
"Got OCEL DF with {:?} objects in {:?}; Object change shape: {:?}; O2O shape: {:?}; E2O shape: {:?}",
ocel_dfs.objects.shape(),
now.elapsed(),
ocel_dfs.object_changes.shape(),
ocel_dfs.o2o.shape(),
ocel_dfs.e2o.shape()
);
assert_eq!(ocel.objects.len(), 13910);
assert_eq!(ocel.events.len(), 35413);


}

#[test]
fn test_ocel2_df() {
let now = Instant::now();
Expand Down

0 comments on commit 6531bd5

Please sign in to comment.