diff --git a/Cargo.lock b/Cargo.lock index 06c34d0..ecd086f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1075,13 +1075,13 @@ dependencies = [ [[package]] name = "process_mining" -version = "0.3.13" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df4483004c273c7c3266b524cadc31d286763d47f4a98630c1d786983eb8737d" +checksum = "086541d8b6dbc983c3349439c8c249c307c211276e424c0f538b06126e21d288" dependencies = [ "chrono", "flate2", - "quick-xml 0.31.0", + "quick-xml", "rayon", "serde", "serde_json", @@ -1163,15 +1163,6 @@ dependencies = [ "thiserror", ] -[[package]] -name = "quick-xml" -version = "0.30.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956" -dependencies = [ - "memchr", -] - [[package]] name = "quick-xml" version = "0.31.0" @@ -1305,7 +1296,7 @@ checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" [[package]] name = "rustxes" -version = "0.2.3" +version = "0.2.4" dependencies = [ "chrono", "flate2", @@ -1313,7 +1304,7 @@ dependencies = [ "process_mining", "pyo3", "pyo3-polars", - "quick-xml 0.30.0", + "quick-xml", "rayon", "serde", "serde_json", diff --git a/Cargo.toml b/Cargo.toml index b153a75..87397dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rustxes" -version = "0.2.3" +version = "0.2.4" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -11,7 +11,7 @@ crate-type = ["cdylib"] [dependencies] pyo3 = "0.20.0" rayon = "1.7.0" -quick-xml = "0.30.0" +quick-xml = "0.31.0" flate2 = "1.0" polars = {version = "0.38.1", features = ["dtype-slim","lazy","timezones","rows","diagonal_concat"]} pyo3-polars = "0.12.0" @@ -19,5 +19,5 @@ serde_json = "1.0.105" serde = {version = "1.0.188", features = ["derive"]} chrono = {version = "0.4.29", features = ["serde"] } uuid = {version = "1.4.1", features = ["v4", "serde"]} -process_mining = "0.3.13" +process_mining = "0.3.14" # process_mining = {path = "../../rust-bridge-process-mining/process_mining"} diff --git a/example.ipynb b/example.ipynb index 5a95c61..1e794ab 100644 --- a/example.ipynb +++ b/example.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -12,17 +12,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ - "ocel_xml_path = \"/home/aarkue/doc/projects/rust-bridge-process-mining/process_mining/src/event_log/tests/test_data/order-management.xml\"\n", + "ocel_xml_path = \"test_data/ContainerLogistics.xml\"\n", + "\n", + "\n", "ocel_rs = rustxes.import_ocel_xml_pm4py(ocel_xml_path)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -31,6 +33,27 @@ "ocel_py = pm4py.read_ocel2_xml(ocel_xml_path)" ] }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "13910\n", + "13882\n", + "False\n" + ] + } + ], + "source": [ + "print(len(ocel_rs.objects))\n", + "print(len(ocel_py.objects))\n", + "print(len(ocel_rs.objects) == len(ocel_py.objects))" + ] + }, { "cell_type": "code", "execution_count": null, @@ -353,7 +376,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.7" + "version": "3.12.4" }, "orig_nbformat": 4 }, diff --git a/src/ocel/mod.rs b/src/ocel/mod.rs index ada6457..57db16b 100644 --- a/src/ocel/mod.rs +++ b/src/ocel/mod.rs @@ -1,12 +1,21 @@ -use std::collections::{HashMap, HashSet}; +use std::{ + collections::{HashMap, HashSet}, + io::BufReader, +}; use chrono::DateTime; use polars::{prelude::*, series::Series}; use process_mining::{ - import_ocel_json_from_path, import_ocel_xml_file, ocel::ocel_struct::OCELAttributeValue, OCEL, + import_ocel_json_from_path, import_ocel_xml, import_ocel_xml_file, + ocel::{ + ocel_struct::OCELAttributeValue, + xml_ocel_import::{import_ocel_xml_file_with, OCELImportOptions}, + }, + OCEL, }; use pyo3::{pyfunction, PyResult}; use pyo3_polars::PyDataFrame; +use quick_xml::Reader; fn ocel_attribute_val_to_any_value<'a>( val: &'a OCELAttributeValue, @@ -189,10 +198,10 @@ pub fn ocel2_to_df(ocel: &OCEL) -> OCEL2DataFrames { if let Some(obj_type) = obj_id_to_type_map.get(&r.object_id) { AnyValue::StringOwned((*obj_type).into()) } else { - eprintln!( - "Invalid object id in E2O reference: Event: {}, Object: {}", - _e.id, r.object_id - ); + // eprintln!( + // "Invalid object id in E2O reference: Event: {}, Object: {}", + // _e.id, r.object_id + // ); AnyValue::Null } }) @@ -443,7 +452,13 @@ pub fn ocel_dfs_to_py(ocel_dfs: OCEL2DataFrames) -> HashMap #[pyfunction] pub fn import_ocel_xml_rs(path: String) -> PyResult> { - let ocel = import_ocel_xml_file(&path); + let ocel = import_ocel_xml_file_with( + &path, + OCELImportOptions { + verbose: false, + ..Default::default() + }, + ); let ocel_dfs = ocel2_to_df(&ocel); Ok(ocel_dfs_to_py(ocel_dfs)) } diff --git a/src/test.rs b/src/test.rs index 9c9e1b2..016ebb4 100644 --- a/src/test.rs +++ b/src/test.rs @@ -67,6 +67,26 @@ mod xes_tests { // ); // } + #[test] + fn test_ocel2_container_df() { + let now = Instant::now(); + let ocel_bytes = include_bytes!("../test_data/ContainerLogistics.xml"); + let ocel = import_ocel_xml_slice(ocel_bytes); + let ocel_dfs = ocel2_to_df(&ocel); + println!( + "Got OCEL DF with {:?} objects in {:?}; Object change shape: {:?}; O2O shape: {:?}; E2O shape: {:?}", + ocel_dfs.objects.shape(), + now.elapsed(), + ocel_dfs.object_changes.shape(), + ocel_dfs.o2o.shape(), + ocel_dfs.e2o.shape() + ); + assert_eq!(ocel.objects.len(), 13910); + assert_eq!(ocel.events.len(), 35413); + + + } + #[test] fn test_ocel2_df() { let now = Instant::now();