Skip to content

Commit

Permalink
Merge pull request seqan#3248 from eseiler/misc/refine_tags
Browse files Browse the repository at this point in the history
[MISC] Refine unknown tag handling (HD/PG)
  • Loading branch information
eseiler authored May 31, 2024
2 parents 41a17ad + b638661 commit 21c7d04
Show file tree
Hide file tree
Showing 7 changed files with 94 additions and 38 deletions.
19 changes: 10 additions & 9 deletions include/seqan3/io/sam_file/detail/format_sam_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,13 +341,18 @@ inline void format_sam_base::read_header(stream_view_type && stream_view,
read_forward_range_field(string_buffer, value);
};

auto print_cerr_of_unspported_tag = [&options](char const * const header_tag, std::array<char, 2> raw_tag)
auto consume_unsupported_tag_and_print_warning =
[&](char const * const header_tag, std::array<char, 2> const raw_tag)
{
// Not using `copy_next_tag_value_into_buffer` because we do not care whether the tag is valid.
// E.g., `pb5.0.0` instead of `pb:5.0.0`, would break the parsing if we used `copy_next_tag_value_into_buffer`.
take_until_predicate(is_char<'\t'> || is_char<'\n'>);

if (options.stream_warnings_to == nullptr)
return;

*options.stream_warnings_to << "Unsupported SAM header tag in @" << header_tag << ": " << raw_tag[0]
<< raw_tag[1] << '\n';
*options.stream_warnings_to << "Unsupported tag found in SAM header @" << header_tag << ": \"" << raw_tag[0]
<< raw_tag[1] << string_buffer << "\"\n";
};

while (it != end && is_char<'@'>(*it))
Expand Down Expand Up @@ -388,7 +393,7 @@ inline void format_sam_base::read_header(stream_view_type && stream_view,
}
default: // unsupported header tag
{
print_cerr_of_unspported_tag("HD", raw_tag);
consume_unsupported_tag_and_print_warning("HD", raw_tag);
}
}

Expand All @@ -397,8 +402,6 @@ inline void format_sam_base::read_header(stream_view_type && stream_view,
copy_next_tag_value_into_buffer();
read_forward_range_field(string_buffer, *header_entry);
}
else
skip_until_predicate(is_char<'\t'> || is_char<'\n'>);
}
++it; // skip newline

Expand Down Expand Up @@ -562,7 +565,7 @@ inline void format_sam_base::read_header(stream_view_type && stream_view,
}
default: // unsupported header tag
{
print_cerr_of_unspported_tag("PG", raw_tag);
consume_unsupported_tag_and_print_warning("PG", raw_tag);
}
}

Expand All @@ -571,8 +574,6 @@ inline void format_sam_base::read_header(stream_view_type && stream_view,
copy_next_tag_value_into_buffer();
read_forward_range_field(string_buffer, *program_info_entry);
}
else
skip_until_predicate(is_char<'\t'> || is_char<'\n'>);
}
++it; // skip newline

Expand Down
45 changes: 37 additions & 8 deletions test/snippet/io/sam_file/sam_file_input_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ struct temporary_file
}
};

static constexpr auto sam_file_raw = R"(@HD VN:1.6 pb:5.0.0
static constexpr auto sam_file_raw = R"(@HD VN:1.6 pb:5.0.0 ot:ter
@SQ SN:ref LN:34
)";

Expand All @@ -43,16 +43,14 @@ static auto get_sam_file_input()
void defaults_to_cerr()
{
auto fin = get_sam_file_input();
std::cerr << "Written to cerr: ";
auto it = fin.begin(); // Prints to cerr: "Unsupported SAM header tag in @HD: pb"
auto it = fin.begin();
}

void redirect_to_cout()
{
auto fin = get_sam_file_input();
fin.options.stream_warnings_to = std::addressof(std::cout); // Equivalent to `= &std::cout;`
std::cout << "Written to cout: ";
auto it = fin.begin(); // Prints to cout: "Unsupported SAM header tag in @HD: pb"
auto it = fin.begin();
}

void redirect_to_file()
Expand All @@ -63,23 +61,54 @@ void redirect_to_file()
{ // Inner scope to close file before reading
std::ofstream warning_file{tmp_file.path};
fin.options.stream_warnings_to = std::addressof(warning_file); // Equivalent to `= &warning_file;`
auto it = fin.begin(); // Prints to file: "Unsupported SAM header tag in @HD: pb"
auto it = fin.begin();
}

std::cout << "Written to file: " << tmp_file.read_content();
std::cout << "File content:\n" << tmp_file.read_content();
}

void silence_warnings()
{
auto fin = get_sam_file_input();
fin.options.stream_warnings_to = nullptr;
auto it = fin.begin(); // No warning emitted
auto it = fin.begin();
}

void filter()
{
auto fin = get_sam_file_input();
std::stringstream stream{};
fin.options.stream_warnings_to = std::addressof(stream); // Equivalent to `= &stream;`
auto it = fin.begin();

for (std::string line{}; std::getline(stream, line);)
{
// If "pb" is not found in the warning, print it to cerr.
if (line.find("pb") == std::string::npos) // C++23: `!line.contains("pb")`
std::cerr << line << '\n';
}
}

void print_section(std::string_view const section)
{
std::cout << "### " << section << " ###\n";
std::cerr << "### " << section << " ###\n";
}

int main()
{
print_section("defaults_to_cerr");
defaults_to_cerr();

print_section("redirect_to_cout");
redirect_to_cout();

print_section("redirect_to_file");
redirect_to_file();

print_section("silence_warnings");
silence_warnings();

print_section("filter");
filter();
}
9 changes: 8 additions & 1 deletion test/snippet/io/sam_file/sam_file_input_options.err
Original file line number Diff line number Diff line change
@@ -1 +1,8 @@
Written to cerr: Unsupported SAM header tag in @HD: pb
### defaults_to_cerr ###
Unsupported tag found in SAM header @HD: "pb:5.0.0"
Unsupported tag found in SAM header @HD: "ot:ter"
### redirect_to_cout ###
### redirect_to_file ###
### silence_warnings ###
### filter ###
Unsupported tag found in SAM header @HD: "ot:ter"
12 changes: 10 additions & 2 deletions test/snippet/io/sam_file/sam_file_input_options.out
Original file line number Diff line number Diff line change
@@ -1,2 +1,10 @@
Written to cout: Unsupported SAM header tag in @HD: pb
Written to file: Unsupported SAM header tag in @HD: pb
### defaults_to_cerr ###
### redirect_to_cout ###
Unsupported tag found in SAM header @HD: "pb:5.0.0"
Unsupported tag found in SAM header @HD: "ot:ter"
### redirect_to_file ###
File content:
Unsupported tag found in SAM header @HD: "pb:5.0.0"
Unsupported tag found in SAM header @HD: "ot:ter"
### silence_warnings ###
### filter ###
11 changes: 7 additions & 4 deletions test/unit/io/sam_file/format_bam_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,14 @@ struct sam_file_read<seqan3::format_bam> : public sam_file_data
'\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00'};

std::string unknown_tag_header{
'\x42', '\x41', '\x4d', '\x01', '\x25', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56',
'\x42', '\x41', '\x4d', '\x01', '\x4b', '\x00', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56',
'\x4e', '\x3a', '\x31', '\x2e', '\x36', '\x09', '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e',
'\x30', '\x0a', '\x40', '\x53', '\x51', '\x09', '\x53', '\x4e', '\x3a', '\x72', '\x65', '\x66', '\x09',
'\x4c', '\x4e', '\x3a', '\x33', '\x34', '\x0a', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00',
'\x00', '\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00'};
'\x30', '\x09', '\x6f', '\x74', '\x74', '\x65', '\x72', '\x0a', '\x40', '\x53', '\x51', '\x09', '\x53',
'\x4e', '\x3a', '\x72', '\x65', '\x66', '\x09', '\x4c', '\x4e', '\x3a', '\x33', '\x34', '\x0a', '\x40',
'\x50', '\x47', '\x09', '\x49', '\x44', '\x3a', '\x6e', '\x6f', '\x76', '\x6f', '\x61', '\x6c', '\x69',
'\x67', '\x6e', '\x09', '\x70', '\x62', '\x3a', '\x35', '\x2e', '\x30', '\x2e', '\x30', '\x09', '\x6f',
'\x74', '\x74', '\x65', '\x72', '\x0a', '\x01', '\x00', '\x00', '\x00', '\x04', '\x00', '\x00', '\x00',
'\x72', '\x65', '\x66', '\x00', '\x22', '\x00', '\x00', '\x00'};

std::string big_header_input{
'\x42', '\x41', '\x4D', '\x01', '\xB7', '\x01', '\x00', '\x00', '\x40', '\x48', '\x44', '\x09', '\x56', '\x4E',
Expand Down
26 changes: 15 additions & 11 deletions test/unit/io/sam_file/format_sam_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,12 @@ struct sam_file_read<seqan3::format_sam> : public sam_file_data
@SQ SN:ref LN:34
)"};

// "otter" is not valid because a user-defined/local tag must have the format [TAG]:[VALUE].
// However, encountering such a tag should not break the parsing.
std::string unknown_tag_header{
R"(@HD VN:1.6 pb:5.0.0
R"(@HD VN:1.6 pb:5.0.0 otter
@SQ SN:ref LN:34
@PG ID:novoalign pb:5.0.0 otter
)"};

std::string big_header_input{
Expand Down Expand Up @@ -176,16 +179,17 @@ TEST_F(sam_format, header_errors)
"@SQ\tSN:ref2\tLN:243199373\tSB:user_tag\tLB:user_tag\tpb:user_tag\n"
"@RG\tID:U0a_A2_L1\tIB:user_tag\tpb:user_tag\n"
"@PG\tID:qc\tIB:user_tag\tPB:user_tag\tCB:user_tag\tDB:user_tag\tVB:user_tag\tpb:user_tag\n"};
std::string expected_cerr{"Unsupported SAM header tag in @HD: VB\n"
"Unsupported SAM header tag in @HD: SB\n"
"Unsupported SAM header tag in @HD: GB\n"
"Unsupported SAM header tag in @HD: pb\n"
"Unsupported SAM header tag in @PG: IB\n"
"Unsupported SAM header tag in @PG: PB\n"
"Unsupported SAM header tag in @PG: CB\n"
"Unsupported SAM header tag in @PG: DB\n"
"Unsupported SAM header tag in @PG: VB\n"
"Unsupported SAM header tag in @PG: pb\n"};
std::string expected_cerr{"Unsupported tag found in SAM header @HD: \"VB:user_tag\"\n"
"Unsupported tag found in SAM header @HD: \"SB:user_tag\"\n"
"Unsupported tag found in SAM header @HD: \"GB:user_tag\"\n"
"Unsupported tag found in SAM header @HD: \"pb:user_tag\"\n"
"Unsupported tag found in SAM header @PG: \"IB:user_tag\"\n"
"Unsupported tag found in SAM header @PG: \"PB:user_tag\"\n"
"Unsupported tag found in SAM header @PG: \"CB:user_tag\"\n"
"Unsupported tag found in SAM header @PG: \"DB:user_tag\"\n"
"Unsupported tag found in SAM header @PG: \"VB:user_tag\"\n"
"Unsupported tag found in SAM header @PG: \"pb:user_tag\"\n"};

std::istringstream istream(header_str);
seqan3::sam_file_input fin{istream, seqan3::format_sam{}};

Expand Down
10 changes: 7 additions & 3 deletions test/unit/io/sam_file/sam_file_format_test_template.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,10 @@ TYPED_TEST_P(sam_file_read, issue2423)

TYPED_TEST_P(sam_file_read, unknown_header_tag)
{
constexpr std::string_view expected_warning = "Unsupported tag found in SAM header @HD: \"pb:5.0.0\"\n"
"Unsupported tag found in SAM header @HD: \"otter\"\n"
"Unsupported tag found in SAM header @PG: \"pb:5.0.0\"\n"
"Unsupported tag found in SAM header @PG: \"otter\"\n";
// Default: Warnings to cerr
{
typename TestFixture::stream_type istream{this->unknown_tag_header};
Expand All @@ -367,7 +371,7 @@ TYPED_TEST_P(sam_file_read, unknown_header_tag)
testing::internal::CaptureStderr();
EXPECT_NO_THROW(fin.begin());
EXPECT_EQ(testing::internal::GetCapturedStdout(), "");
EXPECT_EQ(testing::internal::GetCapturedStderr(), "Unsupported SAM header tag in @HD: pb\n");
EXPECT_EQ(testing::internal::GetCapturedStderr(), expected_warning);
}
// Redirect to cout
{
Expand All @@ -377,7 +381,7 @@ TYPED_TEST_P(sam_file_read, unknown_header_tag)
testing::internal::CaptureStdout();
testing::internal::CaptureStderr();
EXPECT_NO_THROW(fin.begin());
EXPECT_EQ(testing::internal::GetCapturedStdout(), "Unsupported SAM header tag in @HD: pb\n");
EXPECT_EQ(testing::internal::GetCapturedStdout(), expected_warning);
EXPECT_EQ(testing::internal::GetCapturedStderr(), "");
}
// Redirect to file
Expand All @@ -403,7 +407,7 @@ TYPED_TEST_P(sam_file_read, unknown_header_tag)
std::ifstream warning_file{filename};
ASSERT_TRUE(warning_file.good());
std::string content{std::istreambuf_iterator<char>(warning_file), std::istreambuf_iterator<char>()};
EXPECT_EQ(content, "Unsupported SAM header tag in @HD: pb\n");
EXPECT_EQ(content, expected_warning);
}
// Silence
{
Expand Down

0 comments on commit 21c7d04

Please sign in to comment.