Skip to content

Commit

Permalink
(wip): refactor parser selector into a format file
Browse files Browse the repository at this point in the history
  • Loading branch information
piotr-roslaniec committed Mar 5, 2025
1 parent c34ae7b commit 53070ce
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 25 deletions.
36 changes: 11 additions & 25 deletions web-prover-core/src/parser/config.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
use std::fmt::Display;

use serde::{Deserialize, Serialize};
use serde_json::Value;

use crate::parser::{
errors::ExtractorError,
extractors::{extract_html, extract_json, get_value_type, ExtractionResult, Extractor},
extractors::{ExtractionResult, Extractor},
format::{FormatExtractor, HtmlExtractor, JsonExtractor},
};

/// The format of the data to extract from
Expand All @@ -27,31 +30,14 @@ pub struct ExtractorConfig {
}

impl ExtractorConfig {
/// Extracts and validates data using this extractor configuration
pub fn extract_and_validate(&self, data: &Value) -> Result<ExtractionResult, ExtractorError> {
match self.format {
DataFormat::Json => {
// For JSON, we expect the data to be an object or array
match data {
Value::Object(_) | Value::Array(_) => extract_json(data, self),
_ => Err(ExtractorError::TypeMismatch {
expected: "object or array".to_string(),
actual: get_value_type(data).to_string(),
}),
}
},
DataFormat::Html => {
// For HTML, we expect the data to be a string value
if let Value::String(html_str) = data {
extract_html(html_str, self)
} else {
Err(ExtractorError::TypeMismatch {
expected: "string".to_string(),
actual: get_value_type(data).to_string(),
})
}
},
}
let extractor: Box<dyn FormatExtractor> = match self.format {
DataFormat::Json => Box::new(JsonExtractor),
DataFormat::Html => Box::new(HtmlExtractor),
};

extractor.validate_input(data)?;
extractor.extract(data, self)
}
}

Expand Down
59 changes: 59 additions & 0 deletions web-prover-core/src/parser/format.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
use serde_json::Value;

use crate::parser::{
extractors::{extract_html, extract_json, get_value_type},
ExtractionResult, ExtractorConfig, ExtractorError,
};

/// Trait for data format extractors
pub trait FormatExtractor {
fn validate_input(&self, data: &Value) -> Result<(), ExtractorError>;
fn extract(
&self,
data: &Value,
config: &ExtractorConfig,
) -> Result<ExtractionResult, ExtractorError>;
}

pub struct JsonExtractor;
pub struct HtmlExtractor;

impl FormatExtractor for JsonExtractor {
fn validate_input(&self, data: &Value) -> Result<(), ExtractorError> {
if !matches!(data, Value::Object(_) | Value::Array(_)) {
return Err(ExtractorError::TypeMismatch {
expected: "object or array".to_string(),
actual: get_value_type(data).to_string(),
});
}
Ok(())
}

fn extract(
&self,
data: &Value,
config: &ExtractorConfig,
) -> Result<ExtractionResult, ExtractorError> {
extract_json(data, config)
}
}

impl FormatExtractor for HtmlExtractor {
fn validate_input(&self, data: &Value) -> Result<(), ExtractorError> {
if !matches!(data, Value::String(_)) {
return Err(ExtractorError::TypeMismatch {
expected: "string".to_string(),
actual: get_value_type(data).to_string(),
});
}
Ok(())
}

fn extract(
&self,
data: &Value,
config: &ExtractorConfig,
) -> Result<ExtractionResult, ExtractorError> {
data.as_str().map(|s| extract_html(s, config)).unwrap_or_else(|| unreachable!())
}
}
1 change: 1 addition & 0 deletions web-prover-core/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ mod errors;
mod predicate;

mod extractors;
pub(crate) mod format;
#[cfg(test)] mod test_fixtures;
#[cfg(test)] mod test_utils;

Expand Down

0 comments on commit 53070ce

Please sign in to comment.