Skip to content

Commit

Permalink
update for empty data
Browse files Browse the repository at this point in the history
  • Loading branch information
smehringer committed Dec 20, 2021
1 parent f3adc53 commit 498f128
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 70 deletions.
23 changes: 14 additions & 9 deletions include/bio/format/sam_input_handler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class format_input_handler<sam> : public format_input_handler_base<format_input_
void parse_field(vtag_t<field::qname> const & /**/, parsed_field_t & parsed_field)
{
std::string_view raw_field = get<field::qname>(raw_record);
if (raw_field != ".")
if (raw_field != "*")
parse_field_aux(raw_field, parsed_field); //default parsing
}

Expand All @@ -124,7 +124,9 @@ class format_input_handler<sam> : public format_input_handler_base<format_input_
void parse_field(vtag_t<field::rname> const & /**/, parsed_field_t & parsed_field)
{
std::string_view raw_field = get<field::rname>(raw_record);
parse_field_aux(raw_field, parsed_field);

if (raw_field != "*")
parse_field_aux(raw_field, parsed_field);

// todo insert into header
}
Expand All @@ -136,7 +138,7 @@ class format_input_handler<sam> : public format_input_handler_base<format_input_
{
std::string_view raw_field = get<field::cigar>(raw_record);

if (raw_field != ".")
if (raw_field != "*")
{
uint32_t cigar_count{};
char const * ptr = raw_field.data();
Expand All @@ -162,10 +164,13 @@ class format_input_handler<sam> : public format_input_handler_base<format_input_
{
std::string_view raw_field = get<field::rnext>(raw_record);

if (raw_field == "=")
raw_field = get<field::rname>(raw_record);
if (raw_field != "*")
{
if (raw_field == "=")
raw_field = get<field::rname>(raw_record);

parse_field_aux(raw_field, parsed_field);
parse_field_aux(raw_field, parsed_field);
}
}

/* PNEXT, TLEN are handled correctly by default */
Expand All @@ -176,7 +181,7 @@ class format_input_handler<sam> : public format_input_handler_base<format_input_
{
std::string_view raw_field = get<field::seq>(raw_record);

if (raw_field != ".")
if (raw_field != "*")
parse_field_aux(raw_field, parsed_field); // reading into e.g. dna4 vector
}

Expand All @@ -186,7 +191,7 @@ class format_input_handler<sam> : public format_input_handler_base<format_input_
{
std::string_view raw_field = get<field::qual>(raw_record);

if (raw_field != ".")
if (raw_field != "*")
parse_field_aux(raw_field, parsed_field); // reading into e.g. dna4 vector
}

Expand Down Expand Up @@ -214,7 +219,7 @@ class format_input_handler<sam> : public format_input_handler_base<format_input_
}

std::string header_string;
while (file_it != std::default_sentinel && file_it.peak() == '#')
while (file_it != std::default_sentinel && file_it.peak() == '@')
{
++file_it;
++line;
Expand Down
108 changes: 48 additions & 60 deletions test/unit/format/sam_file_format_test_template.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,20 +150,11 @@ struct sam_file_read : public sam_file_data

TYPED_TEST_SUITE_P(sam_file_read);

// ----------------------------------------------------------------------------
// general
// ----------------------------------------------------------------------------

// TYPED_TEST_P(sam_file_read, input_concept)
// {
// EXPECT_TRUE((seqan3::sam_file_input_format<TypeParam>));
// }

// ----------------------------------------------------------------------------
// sam_file_read
// ----------------------------------------------------------------------------

TYPED_TEST_P(sam_file_read, read_in_all_data)
TYPED_TEST_P(sam_file_read, full_data_set)
{
// prepare tag dictionary
this->tag_dicts[0]["NM"_tag] = -7;
Expand Down Expand Up @@ -210,36 +201,54 @@ TYPED_TEST_P(sam_file_read, read_in_all_data)
}
}

// TYPED_TEST_P(sam_file_read, read_in_all_but_empty_data)
// {
// typename TestFixture::stream_type istream{this->empty_input};
// seqan3::sam_file_input fin{istream, this->ref_ids, this->ref_sequences, TypeParam{}};

// EXPECT_TRUE((*fin.begin()).sequence().empty());
// EXPECT_TRUE((*fin.begin()).id().empty());
// EXPECT_TRUE((*fin.begin()).base_qualities().empty());
// EXPECT_EQ((*fin.begin()).sequence_position(), 0);
// EXPECT_TRUE(!(*fin.begin()).reference_id().has_value());
// EXPECT_TRUE(!(*fin.begin()).reference_position().has_value());
// EXPECT_TRUE(std::ranges::empty(std::get<0>((*fin.begin()).alignment())));
// EXPECT_TRUE(std::ranges::empty(std::get<1>((*fin.begin()).alignment())));
// EXPECT_EQ((*fin.begin()).flag(), seqan3::sam_flag{0u});
// EXPECT_EQ((*fin.begin()).mapping_quality(), 0u);
// EXPECT_TRUE(!(*fin.begin()).mate_reference_id().has_value());
// EXPECT_TRUE(!(*fin.begin()).mate_position().has_value());
// EXPECT_EQ((*fin.begin()).template_length(), int32_t{});
// EXPECT_TRUE((*fin.begin()).tags().empty());
// }
TYPED_TEST_P(sam_file_read, all_missing_data)
{
typename TestFixture::stream_type istream{this->empty_input};

// TYPED_TEST_P(sam_file_read, read_in_almost_nothing)
// {
// typename TestFixture::stream_type istream{this->simple_three_reads_input};
// seqan3::sam_file_input fin{istream, TypeParam{}, seqan3::fields<seqan3::field::mapq>{}};
using record_t =
bio::detail::record_from_typelist<std::remove_cvref_t<decltype(bio::map_io::default_field_ids)>,
std::remove_cvref_t<decltype(bio::map_io::field_types_sam<>)>>;
bio::map_io::reader_options default_options{};
bio::format_input_handler<bio::sam> input_handler{istream, default_options};
record_t rec;

// size_t i{0};
// for (auto & [mapq] : fin)
// EXPECT_EQ(mapq, this->mapqs[i++]);
// }
input_handler.parse_next_record_into(rec);

EXPECT_TRUE(rec.id().empty());
EXPECT_TRUE(rec.rname().empty());
EXPECT_TRUE(rec.rnext().empty());
EXPECT_TRUE(rec.cigar().empty());
EXPECT_TRUE(rec.seq().empty());
EXPECT_TRUE(rec.qual().empty());
// EXPECT_TRUE(rec.tags().empty()) << tags;

EXPECT_EQ(rec.flag(), bio::map_io::sam_flag{0u});
EXPECT_EQ(rec.pos(), 0);
EXPECT_EQ(rec.pnext(), 0);
EXPECT_EQ(rec.mapq(), 0u);
EXPECT_EQ(rec.tlen(), 0);
}

TYPED_TEST_P(sam_file_read, select_fields)
{
typename TestFixture::stream_type istream{this->empty_input};

constexpr auto fid = bio::vtag<bio::field::rname, bio::field::pos>;
constexpr auto ftype = bio::ttag<std::string_view, int64_t>;

using record_t =
bio::detail::record_from_typelist<std::remove_cvref_t<decltype(fid)>, std::remove_cvref_t<decltype(ftype)>>;
bio::map_io::reader_options default_options{};
bio::format_input_handler<bio::sam> input_handler{istream, default_options};
record_t rec;

for (unsigned i = 0; i < 3; ++i)
{
input_handler.parse_next_record_into(rec);
EXPECT_EQ(rec.rname(), this->ref_id);
EXPECT_EQ(rec.pos(), this->positions[i]);
}
}

// TYPED_TEST_P(sam_file_read, read_in_alignment_only_with_ref)
// {
Expand Down Expand Up @@ -327,27 +336,6 @@ TYPED_TEST_P(sam_file_read, read_in_all_data)
// }
// }

// TYPED_TEST_P(sam_file_read, cigar_vector)
// {
// std::vector<std::vector<seqan3::cigar>> expected
// {
// {{1, 'S'_cigar_operation}, {1, 'M'_cigar_operation}, {1, 'D'_cigar_operation}, {1, 'M'_cigar_operation},
// {1, 'I'_cigar_operation}},
// {{1, 'H'_cigar_operation}, {7, 'M'_cigar_operation}, {1, 'D'_cigar_operation}, {1, 'M'_cigar_operation},
// {1, 'S'_cigar_operation}, {2, 'H'_cigar_operation}},
// {{1, 'S'_cigar_operation}, {1, 'M'_cigar_operation}, {1, 'P'_cigar_operation}, {1, 'M'_cigar_operation},
// {1, 'I'_cigar_operation}, {1, 'M'_cigar_operation}, {1, 'I'_cigar_operation}, {1, 'D'_cigar_operation},
// {1, 'M'_cigar_operation}, {1, 'S'_cigar_operation}}
// };

// typename TestFixture::stream_type istream{this->simple_three_reads_input};
// seqan3::sam_file_input fin{istream, TypeParam{}, seqan3::fields<seqan3::field::cigar>{}};

// size_t i{0};
// for (auto & [cigar_v] : fin)
// EXPECT_EQ(cigar_v, expected[i++]);
// }

// TYPED_TEST_P(sam_file_read, format_error_ref_id_not_in_reference_information)
// {
// { // with reference information given
Expand Down Expand Up @@ -672,7 +660,7 @@ TYPED_TEST_P(sam_file_read, read_in_all_data)
// seqan3::format_error);
// }

REGISTER_TYPED_TEST_SUITE_P(sam_file_read, read_in_all_data);
REGISTER_TYPED_TEST_SUITE_P(sam_file_read, full_data_set, all_missing_data, select_fields);

// REGISTER_TYPED_TEST_SUITE_P(sam_file_read,
// input_concept,
Expand Down
2 changes: 1 addition & 1 deletion test/unit/format/sam_input_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ read3 43 ref 3 63 1S1M1P1M1I1M1I1D1M1S ref 10 300 GGAGTATA !!*+,-./
"\tbH:H:1AE301\n"
"read3\t43\tref\t3\t63\t1S1M1P1M1I1M1I1D1M1S\tref\t10\t300\tGGAGTATA\t!!*+,-./\n"};

std::string empty_input{"@HD\tVN:1.6\n@SQ\tSN:ref\tLN:34\n*\t0\t*\t0\t0\t*\t*\t0\t0\t*\t*\n"};
std::string empty_input{"*\t0\t*\t0\t0\t*\t*\t0\t0\t*\t*\n"};

std::string empty_cigar{"read1\t41\tref\t1\t61\t*\tref\t10\t300\tACGT\t!##$\n"};

Expand Down

0 comments on commit 498f128

Please sign in to comment.