Skip to content

Commit

Permalink
Merge pull request #23 from h-2/bcf_output2
Browse files Browse the repository at this point in the history
Bcf output
  • Loading branch information
h-2 authored Feb 16, 2022
2 parents c00abe2 + 3fe704c commit 1e3d708
Show file tree
Hide file tree
Showing 8 changed files with 1,595 additions and 132 deletions.
1,270 changes: 1,270 additions & 0 deletions include/bio/format/bcf_output_handler.hpp

Large diffs are not rendered by default.

64 changes: 39 additions & 25 deletions include/bio/stream/detail/fast_streambuf_iterator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <span>

#include <bio/detail/charconv.hpp>
#include <bio/detail/concept.hpp>

namespace bio::detail
{
Expand Down Expand Up @@ -253,7 +254,7 @@ class fast_ostreambuf_iterator
// TODO: evaluate whether this is actually faster than just calling: stream_buf->sputc(c);
if (stream_buf->pptr() == stream_buf->epptr())
{
if (stream_buf->sputc(c) == traits_t::eof()) // overflow() [virtual], then write character
if (stream_buf->sputc(c) == traits_t::eof()) // sputc() [virtual], then write character
{
// LCOV_EXCL_START
throw std::ios_base::failure{"Cannot write to output stream (reached traits::eof() condition)."};
Expand All @@ -273,9 +274,9 @@ class fast_ostreambuf_iterator
bool failed() const noexcept { return stream_buf->overflow() == traits_t::eof(); }

/*!\brief Writes a range to the associated output.
* \tparam range_type The type of range to write; Must model std::ranges::forward_range.
* \tparam rng_t The type of range to write; Must model std::ranges::forward_range.
* \param[in] rng The range to write.
* \returns If `range_type` models `std::ranges::borrowed_range` returns an iterator pointing to end of the range
* \returns If `rng_t` models `std::ranges::borrowed_range` returns an iterator pointing to end of the range
* (rng) else returns `void`.
*
* This function avoids the buffer-at-end check by writing the range in chunks, where a chunks has the size of
Expand All @@ -290,26 +291,29 @@ class fast_ostreambuf_iterator
*
* \include test/snippet/io/detail/iterator_write_range.cpp
*/
template <std::ranges::forward_range range_type>
//!\cond
requires std::ranges::borrowed_range<range_type>
//!\endcond
auto write_range(range_type && rng)
template <std::ranges::forward_range rng_t>
auto write_range(rng_t && rng)
{
using sen_t = std::ranges::sentinel_t<range_type>;
using it_t = std::ranges::iterator_t<range_type>;
using sen_t = std::ranges::sentinel_t<rng_t>;
using it_t = std::ranges::iterator_t<rng_t>;

it_t it = std::ranges::begin(rng);
sen_t end = std::ranges::end(rng);

if (stream_buf->epptr() - stream_buf->pptr() == 0 && it != end)
{
stream_buf->sputc(*it);
++it;
}

while (it != end)
{
size_t const buffer_space = stream_buf->epptr() - stream_buf->pptr();
assert(buffer_space > 0);

if constexpr (std::ranges::sized_range<range_type>)
if constexpr (std::sized_sentinel_for<sen_t, it_t>)
{
size_t const characters_to_write = std::min<size_t>(std::ranges::distance(it, end), buffer_space);
// TODO if input range is contiguous over char, use sputn/xsputn instead
size_t const characters_to_write = std::min<size_t>(end - it, buffer_space);
auto copy_res = std::ranges::copy_n(it, characters_to_write, stream_buf->pptr());
it = copy_res.in;
stream_buf->pbump(characters_to_write);
Expand All @@ -323,32 +327,42 @@ class fast_ostreambuf_iterator
}

if (it == end) // no more characters to write
return it;
break;

// Push one more character and flush
if (stream_buf->overflow(*it) == traits_t::eof())
if (stream_buf->sputc(*it) == traits_t::eof())
{
// LCOV_EXCL_START
throw std::ios_base::failure{"Cannot write to output stream (reached traits::eof() condition)."};
// LCOV_EXCL_STOP
}

++it; // drop 1 character that has been written in overflow()
++it; // drop 1 character that has been written in sputc() above
}

return it;
if constexpr (std::ranges::borrowed_range<rng_t>)
return it;
else
return;
}

//!\cond
// overload for non-std::ranges::borrowed_range types that return void
template <std::ranges::forward_range range_type>
void write_range(range_type && rng)
//!\overload
template <std::ranges::contiguous_range rng_t>
//!\cond
requires(std::ranges::sized_range<rng_t> && std::same_as<std::ranges::range_value_t<rng_t> const, char const>)
//!\endcond
auto write_range(rng_t && rng)
{
write_range(rng); // lvalue is always a safe range. return value is ignored because iterator would be dangling
stream_buf->sputn(std::ranges::data(rng), std::ranges::size(rng));

if constexpr (std::ranges::borrowed_range<rng_t>)
return std::ranges::begin(rng) + std::ranges::size(rng);
else
return;
}

//!\overload
void write_range(char const * const cstring) { write_range(std::string_view{cstring}); }
//!\endcond

/*!\brief Writes a number to the underlying stream buffer using std::to_chars.
* \param[in] num The number to write.
Expand All @@ -367,7 +381,7 @@ class fast_ostreambuf_iterator
}
}

//!\brief Write the binary representation of a type byte-wise to the output stream.
//!\brief Write the binary representation of an object byte-wise to the output stream.
template <typename t>
requires(std::is_trivially_copyable_v<t> && !std::ranges::range<t>)
void write_as_binary(t const & num)
Expand All @@ -377,7 +391,7 @@ class fast_ostreambuf_iterator
write_range(v);
}

//!\brief Write a contiguous range byte-wise to the output stream.
//!\brief Write the binary representation of a contiguous range byte-wise to the output stream.
template <std::ranges::contiguous_range rng_t>
requires std::ranges::sized_range<rng_t>
void write_as_binary(rng_t && rng)
Expand Down
1 change: 1 addition & 0 deletions include/bio/var_io/writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <iosfwd>

#include <bio/detail/writer_base.hpp>
#include <bio/format/bcf_output_handler.hpp>
#include <bio/format/vcf_output_handler.hpp>
#include <bio/var_io/header.hpp>
#include <bio/var_io/writer_options.hpp>
Expand Down
5 changes: 3 additions & 2 deletions include/bio/var_io/writer_options.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

#pragma once

#include <bio/format/bcf.hpp>
#include <bio/format/vcf.hpp>
#include <bio/stream/transparent_ostream.hpp>
#include <bio/var_io/misc.hpp>
Expand Down Expand Up @@ -82,7 +83,7 @@ namespace bio::var_io
*
* TODO describe how to easily initialise this
*/
template <typename formats_t = seqan3::type_list<vcf>>
template <typename formats_t = seqan3::type_list<bcf, vcf>>
struct writer_options
{
/*!\brief Try to use types smaller than 32bit to represent integers.
Expand All @@ -102,7 +103,7 @@ struct writer_options
*
* See bio::var_io::writer for an overview of the the supported formats.
*/
formats_t formats = ttag<vcf>;
formats_t formats = ttag<bcf, vcf>;

//!\brief Options that are passed on to the internal stream oject.
transparent_ostream_options stream_options{};
Expand Down
1 change: 1 addition & 0 deletions test/unit/format/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
bio_test(bcf_input_test.cpp)
bio_test(bcf_output_test.cpp)
bio_test(fasta_input_test.cpp)
bio_test(vcf_input_test.cpp)
bio_test(vcf_output_test.cpp)
Loading

0 comments on commit 1e3d708

Please sign in to comment.