From bd18f582fdbfaf93b08f0d8d47af304e3d6a2322 Mon Sep 17 00:00:00 2001 From: Arpad Borsos Date: Wed, 4 Sep 2024 13:03:38 +0200 Subject: [PATCH] get closer to the existing parser interface dealing with report builders --- core/benches/pyreport.rs | 28 +++--- core/src/error.rs | 5 + core/src/parsers/pyreport/chunks_serde.rs | 115 +++++++++++++++++++--- core/src/report/pyreport/types.rs | 17 ++++ 4 files changed, 140 insertions(+), 25 deletions(-) diff --git a/core/benches/pyreport.rs b/core/benches/pyreport.rs index 4e1fcb8..ed9b10a 100644 --- a/core/benches/pyreport.rs +++ b/core/benches/pyreport.rs @@ -1,4 +1,4 @@ -use std::{collections::HashMap, hint::black_box}; +use std::collections::HashMap; use codecov_rs::{ parsers::pyreport::{chunks, chunks_serde, report_json}, @@ -120,8 +120,13 @@ fn simple_chunks_serde() { b"{}\n<<<<< end_of_header >>>>>\n{}\n[1, null, [[0, 1]]]\n\n<<<<< end_of_chunk >>>>>\n{}\n[1, null, [[0, 1]]]\n[1, null, [[0, 1]]]\n", ]; + let report_json = report_json::ParsedReportJson { + files: Default::default(), + sessions: Default::default(), + }; + for input in chunks { - parse_chunks_file_serde(input) + parse_chunks_file_serde(input, &report_json); } } @@ -132,17 +137,18 @@ fn complex_chunks_serde(bencher: Bencher) { let chunks = load_fixture("pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-chunks.txt"); - bencher.bench(|| parse_chunks_file_serde(&chunks)); + // parsing the chunks depends on having loaded the `report_json` + let report = load_fixture( + "pyreport/large/worker-c71ddfd4cb1753c7a540e5248c2beaa079fc3341-report_json.json", + ); + let report_json = parse_report_json(&report); + + bencher.bench(|| parse_chunks_file_serde(&chunks, &report_json)); } -fn parse_chunks_file_serde(input: &[u8]) { - let chunks_file = chunks_serde::ChunksFile::new(input).unwrap(); - let mut chunks = chunks_file.chunks(); - while let Some(mut chunk) = chunks.next_chunk().unwrap() { - while let Some(line) = chunk.next_line().unwrap() { - black_box(line); - } - } +fn parse_chunks_file_serde(input: &[u8], report_json: &report_json::ParsedReportJson) { + let mut report_builder = TestReportBuilder::default(); + chunks_serde::parse_chunks_file(input, report_json, &mut report_builder).unwrap(); } #[track_caller] diff --git a/core/src/error.rs b/core/src/error.rs index 5f793bb..b3d0563 100644 --- a/core/src/error.rs +++ b/core/src/error.rs @@ -1,5 +1,7 @@ use thiserror::Error; +use crate::parsers::pyreport::chunks_serde::ChunksFileParseError; + pub type Result = std::result::Result; #[derive(Error, Debug)] @@ -26,4 +28,7 @@ pub enum CodecovError { #[cfg(feature = "pyreport")] #[error("failed to convert sqlite to pyreport: '{0}'")] PyreportConversionError(String), + + #[error(transparent)] + ChunksFileParseError(#[from] ChunksFileParseError), } diff --git a/core/src/parsers/pyreport/chunks_serde.rs b/core/src/parsers/pyreport/chunks_serde.rs index 121057b..9372e7b 100644 --- a/core/src/parsers/pyreport/chunks_serde.rs +++ b/core/src/parsers/pyreport/chunks_serde.rs @@ -37,10 +37,84 @@ use std::{collections::HashMap, fmt, mem, sync::OnceLock}; use memchr::{memchr, memmem}; use serde::{de, de::IgnoredAny, Deserialize}; -use crate::report::pyreport::{CHUNKS_FILE_END_OF_CHUNK, CHUNKS_FILE_HEADER_TERMINATOR}; +use super::report_json::ParsedReportJson; +use crate::{ + error::CodecovError, + report::{ + models, + pyreport::{ + types::{self, PyreportCoverage, ReportLine}, + CHUNKS_FILE_END_OF_CHUNK, CHUNKS_FILE_HEADER_TERMINATOR, + }, + Report, ReportBuilder, + }, +}; + +pub fn parse_chunks_file( + input: &[u8], + _report_json: &ParsedReportJson, + builder: &mut B, +) -> Result<(), CodecovError> +where + B: ReportBuilder, + R: Report, +{ + let chunks_file = ChunksFile::new(input)?; + + let mut labels_index = HashMap::with_capacity(chunks_file.labels_index().len()); + for (index, name) in chunks_file.labels_index() { + let context = builder.insert_context(name)?; + labels_index.insert(index.clone(), context.id); + } + + let mut report_lines = vec![]; + + let mut chunks = chunks_file.chunks(); + while let Some(mut chunk) = chunks.next_chunk()? { + let mut line_no = 0; + report_lines.clear(); + while let Some(line) = chunk.next_line()? { + line_no += 1; + if let Some(line) = line { + let coverage_type = match line.1.unwrap_or_default() { + CoverageType::Line => models::CoverageType::Line, + CoverageType::Branch => models::CoverageType::Branch, + CoverageType::Method => models::CoverageType::Method, + }; + let sessions = line + .2 + .into_iter() + .map(|session| types::LineSession { + session_id: session.0, + coverage: session.1.into(), + branches: None, // TODO + partials: None, // TODO + complexity: None, // TODO + }) + .collect(); + + let mut report_line = ReportLine { + line_no, + coverage: line.0.into(), + coverage_type, + sessions, + _messages: None, + _complexity: None, + datapoints: None, // TODO + }; + report_line.normalize(); + report_lines.push(report_line); + } + } + // TODO: + // utils::save_report_lines()?; + } + + Ok(()) +} #[derive(Debug, thiserror::Error)] -pub enum ParserError { +pub enum ChunksFileParseError { #[error("unexpected EOF")] UnexpectedEof, #[error("unexpected input")] @@ -53,12 +127,12 @@ pub enum ParserError { InvalidLineRecord(#[source] serde_json::Error), } -impl PartialEq for ParserError { +impl PartialEq for ChunksFileParseError { fn eq(&self, other: &Self) -> bool { core::mem::discriminant(self) == core::mem::discriminant(other) } } -impl Eq for ParserError {} +impl Eq for ChunksFileParseError {} #[derive(Debug)] pub struct ChunksFile<'d> { @@ -67,7 +141,7 @@ pub struct ChunksFile<'d> { } impl<'d> ChunksFile<'d> { - pub fn new(mut input: &'d [u8]) -> Result { + pub fn new(mut input: &'d [u8]) -> Result { static HEADER_FINDER: OnceLock = OnceLock::new(); let header_finder = HEADER_FINDER.get_or_init(|| memmem::Finder::new(CHUNKS_FILE_HEADER_TERMINATOR)); @@ -75,8 +149,8 @@ impl<'d> ChunksFile<'d> { let file_header = if let Some(pos) = header_finder.find(input) { let header_bytes = &input[..pos]; input = &input[pos + header_finder.needle().len()..]; - let file_header: FileHeader = - serde_json::from_slice(header_bytes).map_err(ParserError::InvalidFileHeader)?; + let file_header: FileHeader = serde_json::from_slice(header_bytes) + .map_err(ChunksFileParseError::InvalidFileHeader)?; file_header } else { FileHeader::default() @@ -99,7 +173,7 @@ pub struct Chunks<'d> { } impl<'d> Chunks<'d> { - pub fn next_chunk(&mut self) -> Result>, ParserError> { + pub fn next_chunk(&mut self) -> Result>, ChunksFileParseError> { if self.input.is_empty() { return Ok(None); } @@ -123,9 +197,10 @@ impl<'d> Chunks<'d> { })); } - let header_bytes = next_line(&mut chunk_bytes).ok_or(ParserError::UnexpectedInput)?; - let chunk_header: ChunkHeader = - serde_json::from_slice(header_bytes).map_err(ParserError::InvalidFileHeader)?; + let header_bytes = + next_line(&mut chunk_bytes).ok_or(ChunksFileParseError::UnexpectedInput)?; + let chunk_header: ChunkHeader = serde_json::from_slice(header_bytes) + .map_err(ChunksFileParseError::InvalidFileHeader)?; Ok(Some(Chunk { chunk_header, @@ -144,7 +219,7 @@ impl<'d> Chunk<'d> { &self.chunk_header.present_sessions } - pub fn next_line(&mut self) -> Result>, ParserError> { + pub fn next_line(&mut self) -> Result>, ChunksFileParseError> { let Some(line) = next_line(&mut self.input) else { return Ok(None); }; @@ -154,7 +229,7 @@ impl<'d> Chunk<'d> { } let line_record: LineRecord = - serde_json::from_slice(line).map_err(ParserError::InvalidLineRecord)?; + serde_json::from_slice(line).map_err(ChunksFileParseError::InvalidLineRecord)?; return Ok(Some(Some(line_record))); } } @@ -217,7 +292,7 @@ pub struct LineRecord( #[derive(Debug, Clone, PartialEq, Eq, Deserialize)] pub struct LineSession( /// session id - u32, + usize, /// coverage Coverage, /// TODO: branches @@ -260,6 +335,18 @@ pub enum Coverage { HitCount(u32), } +impl Into for Coverage { + fn into(self) -> PyreportCoverage { + match self { + Coverage::Partial => PyreportCoverage::Partial(), + Coverage::BranchTaken(covered, total) => { + PyreportCoverage::BranchesTaken { covered, total } + } + Coverage::HitCount(hits) => PyreportCoverage::HitCount(hits), + } + } +} + impl<'de> Deserialize<'de> for Coverage { fn deserialize(deserializer: D) -> Result where diff --git a/core/src/report/pyreport/types.rs b/core/src/report/pyreport/types.rs index 2c3b06e..4dba6d7 100644 --- a/core/src/report/pyreport/types.rs +++ b/core/src/report/pyreport/types.rs @@ -184,6 +184,23 @@ pub struct ReportLine { pub datapoints: Option>>, } +impl ReportLine { + pub fn normalize(&mut self) { + // Fix issues like recording branch coverage with `CoverageType::Method` + let (correct_coverage, correct_type) = + normalize_coverage_measurement(&self.coverage, &self.coverage_type); + self.coverage = correct_coverage; + self.coverage_type = correct_type; + + // Fix the `coverage` values in each `LineSession` as well + for line_session in &mut self.sessions { + let (correct_coverage, _) = + normalize_coverage_measurement(&line_session.coverage, &self.coverage_type); + line_session.coverage = correct_coverage; + } + } +} + /// Account for some quirks and malformed data. See code comments for details. pub(crate) fn normalize_coverage_measurement( coverage: &PyreportCoverage,