Skip to content

Commit 1771d0f

Browse files
committed
Merge remote-tracking branch 'origin/master' into deconstruct
2 parents 0bba109 + cbf330b commit 1771d0f

36 files changed

+1051
-256
lines changed

Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -762,9 +762,13 @@ $(LIB_DIR)/libtabixpp.a: $(LIB_DIR)/libhts.a $(TABIXPP_DIR)/*.cpp $(TABIXPP_DIR)
762762
# let CMake find Mac OpenMP. We need to use /usr first for CMake search or
763763
# Ubuntu 22.04 will decide pybind11 is installed in / when actually it is only
764764
# fully installed in /usr.
765+
# We also have to make sure to point at a Python that probably has its headers,
766+
# because there's no way to turn off vcflib's pybind11 build and pybind11 will
767+
# try and use the latest installed Python over the default one that probably
768+
# has headers.
765769
$(LIB_DIR)/libvcflib%a $(LIB_DIR)/libwfa2%a: $(LIB_DIR)/libhts.a $(LIB_DIR)/libtabixpp.a $(VCFLIB_DIR)/src/*.cpp $(VCFLIB_DIR)/src/*.hpp $(VCFLIB_DIR)/contrib/*/*.cpp $(VCFLIB_DIR)/contrib/*/*.h
766770
+rm -f $(VCFLIB_DIR)/contrib/WFA2-lib/VERSION
767-
+cd $(VCFLIB_DIR) && rm -Rf build && mkdir build && cd build && PKG_CONFIG_PATH="$(CWD)/$(LIB_DIR)/pkgconfig:$(PKG_CONFIG_PATH)" cmake -DCMAKE_C_COMPILER="$(CC)" -DCMAKE_CXX_COMPILER="$(CXX)" -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DZIG=OFF -DCMAKE_C_FLAGS="$(CFLAGS)" -DCMAKE_CXX_FLAGS="$(CXXFLAGS) ${CPPFLAGS}" -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=$(CWD) -DCMAKE_PREFIX_PATH="/usr;$(OMP_PREFIXES)" .. && cmake --build . --target vcflib vcf2tsv wfa2_static
771+
+cd $(VCFLIB_DIR) && rm -Rf build && mkdir build && cd build && PKG_CONFIG_PATH="$(CWD)/$(LIB_DIR)/pkgconfig:$(PKG_CONFIG_PATH)" cmake -DCMAKE_C_COMPILER="$(CC)" -DCMAKE_CXX_COMPILER="$(CXX)" -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DZIG=OFF -DCMAKE_C_FLAGS="$(CFLAGS)" -DCMAKE_CXX_FLAGS="$(CXXFLAGS) ${CPPFLAGS}" -DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_INSTALL_PREFIX=$(CWD) -DCMAKE_PREFIX_PATH="/usr;$(OMP_PREFIXES)" -DPYTHON_EXECUTABLE="$(shell which python3)" .. && cmake --build . --target vcflib vcf2tsv wfa2_static
768772
+cp $(VCFLIB_DIR)/contrib/filevercmp/*.h* $(INC_DIR)
769773
+cp $(VCFLIB_DIR)/contrib/fastahack/*.h* $(INC_DIR)
770774
+cp $(VCFLIB_DIR)/contrib/smithwaterman/*.h* $(INC_DIR)

deps/sdsl-lite

doc/wiki

Submodule wiki updated from 6aa40e4 to 3d3c56f

src/alignment.cpp

Lines changed: 130 additions & 110 deletions
Large diffs are not rendered by default.

src/alignment.hpp

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@ namespace vg {
2222

2323
const char* const BAM_DNA_LOOKUP = "=ACMGRSVTWYHKDBN";
2424

25-
// htslib-based alignment read functions
25+
// htslib-based alignment read functions.
26+
// When encountering read records that don't agree with the graph (i.e. go off
27+
// path ends, etc.), these stop the program and print a useful error message.
2628

2729
int hts_for_each(string& filename, function<void(Alignment&)> lambda);
2830
int hts_for_each_parallel(string& filename, function<void(Alignment&)> lambda);
@@ -99,11 +101,15 @@ string mapping_string(const string& source, const Mapping& mapping);
99101

100102
void cigar_mapping(const bam1_t *b, Mapping& mapping);
101103

104+
/// Convert a BAM record to an Alignment.
105+
/// May throw AlignmentEmbeddingError if the BAM record is inconsistent with
106+
/// the provided graph.
102107
Alignment bam_to_alignment(const bam1_t *b,
103108
const map<string, string>& rg_sample,
104109
const map<int, path_handle_t>& tid_path_handle,
105110
const bam_hdr_t *bh,
106111
const PathPositionHandleGraph* graph);
112+
/// Convert a BAM record to an Alignment without a graph.
107113
Alignment bam_to_alignment(const bam1_t *b, const map<string, string>& rg_sample, const map<int, path_handle_t>& tid_path_handle);
108114

109115
// the CIGAR string of the graph alignment
@@ -286,7 +292,11 @@ void reverse_complement_alignment_in_place(Alignment* aln, const function<int64_
286292
vector<Alignment> reverse_complement_alignments(const vector<Alignment>& alns, const function<int64_t(nid_t)>& node_length);
287293
int non_match_start(const Alignment& alignment);
288294
int non_match_end(const Alignment& alignment);
295+
/// Get the leading softclip from an Alignment, assuming it is coalesced into a
296+
/// single Edit
289297
int softclip_start(const Alignment& alignment);
298+
/// Get the trailing softclip from an Alignment, assuming it is coalesced into a
299+
/// single Edit
290300
int softclip_end(const Alignment& alignment);
291301
int softclip_trim(Alignment& alignment);
292302
int query_overlap(const Alignment& aln1, const Alignment& aln2);
@@ -400,15 +410,43 @@ struct AlignmentValidity {
400410
/// node lengths or ids. Result can be used like a bool or inspected for
401411
/// further details. Does not log anything itself about bad alignments.
402412
AlignmentValidity alignment_is_valid(const Alignment& aln, const HandleGraph* hgraph, bool check_sequence = false);
403-
413+
414+
/**
415+
* Represents a problem when trying to find a path region in a graph as an
416+
* Alignment, or when trying to inject a linear CIGAR-based alignment into the
417+
* graph along an embedded path.
418+
*
419+
* This could be a problem like the alignment trying to go out of range on the
420+
* embedded linear path/reference, or trying to go across the junction of a
421+
* path that isn't really circular.
422+
*
423+
* We expect the user to be able to cause this with bad inputs, so this
424+
* exception should be handled and reported in a helpful way, rather than as a
425+
* crash.
426+
*/
427+
class AlignmentEmbeddingError : public std::runtime_error {
428+
public:
429+
using std::runtime_error::runtime_error;
430+
};
431+
404432
/// Make an Alignment corresponding to a subregion of a stored path.
405433
/// Positions are 0-based, and pos2 is excluded.
406434
/// Respects path circularity, so pos2 < pos1 is not a problem.
407435
/// If pos1 == pos2, returns an empty alignment.
436+
///
437+
/// Throws AlignmentEmbeddingError if the region goes out of range, or tries to
438+
/// go across the junction of a non-circular path. Despite taking 0-based
439+
/// coordinates, error messages will describe 1-based coordinates.
408440
Alignment target_alignment(const PathPositionHandleGraph* graph, const path_handle_t& path, size_t pos1, size_t pos2,
409441
const string& feature, bool is_reverse);
410442
/// Same as above, but uses the given Mapping, translated directly form a CIGAR string, as a source of edits.
411443
/// The edits are inserted into the generated Alignment, cut as necessary to fit into the Alignment's Mappings.
444+
///
445+
/// Throws AlignmentEmbeddingError if the region goes out of range, or tries to
446+
/// go across the junction of a non-circular path, or if cigar_mapping
447+
/// describes edits that are impossible, like matches past the end of the
448+
/// described region. Despite taking 0-based coordinates, error messages will
449+
/// describe 1-based coordinates.
412450
Alignment target_alignment(const PathPositionHandleGraph* graph, const path_handle_t& path, size_t pos1, size_t pos2,
413451
const string& feature, bool is_reverse, Mapping& cigar_mapping);
414452

src/index_registry.cpp

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5203,6 +5203,10 @@ IndexingPlan IndexRegistry::make_plan(const IndexGroup& end_products) const {
52035203

52045204
// map dependency priority to requesters
52055205
map<size_t, set<size_t>, greater<size_t>> queue;
5206+
5207+
// To help the user, we track index identifiers that, if we had them,
5208+
// we would have been able to make a deeper plan.
5209+
set<IndexGroup> missing_index_sets;
52065210

52075211
auto num_recipes = [&](const IndexGroup& indexes) {
52085212
int64_t num = 0;
@@ -5371,6 +5375,9 @@ IndexingPlan IndexRegistry::make_plan(const IndexGroup& end_products) const {
53715375
#ifdef debug_index_registry
53725376
cerr << "index " << to_string(index_group) << " cannot be made from existing inputs, need to backtrack" << endl;
53735377
#endif
5378+
5379+
// Remember that if we had had this, we could have proceeded.
5380+
missing_index_sets.insert(index_group);
53745381

53755382
// prune to requester and advance to its next recipe, as many times as necessary until
53765383
// requester has remaining un-tried recipes
@@ -5380,7 +5387,7 @@ IndexingPlan IndexRegistry::make_plan(const IndexGroup& end_products) const {
53805387

53815388
if (get<1>(plan_path.back()).empty()) {
53825389
// this is the product of the plan path, and we're out of recipes for it
5383-
throw InsufficientInputException(product, *this);
5390+
throw InsufficientInputException(product, *this, missing_index_sets);
53845391
}
53855392

53865393
// remove items off the plan path until we get to the first index that requested
@@ -5745,16 +5752,34 @@ vector<pair<IndexName, vector<IndexName>>> AliasGraph::non_intermediate_aliases(
57455752
}
57465753

57475754
InsufficientInputException::InsufficientInputException(const IndexName& target,
5748-
const IndexRegistry& registry) noexcept :
5749-
runtime_error("Insufficient input to create " + target), target(target), inputs(registry.completed_indexes())
5755+
const IndexRegistry& registry,
5756+
const set<IndexGroup>& missing_index_sets) noexcept :
5757+
runtime_error("Insufficient input to create " + target), target(target), inputs(registry.completed_indexes()), missing_index_sets(missing_index_sets)
57505758
{
57515759
// nothing else to do
57525760
stringstream ss;
5753-
ss << "Inputs" << endl;
5754-
for (const auto& input : inputs) {
5755-
ss << "\t" << input << endl;
5761+
ss << "Inputs:" << endl;
5762+
if (inputs.empty()) {
5763+
ss << "\t<no inputs provided!>" << endl;
5764+
} else {
5765+
for (const auto& input : inputs) {
5766+
ss << "\t" << input << endl;
5767+
}
5768+
}
5769+
ss << "are insufficient to create target index " << target << "." << endl;
5770+
if (!missing_index_sets.empty()) {
5771+
ss << "Hint: more progress would be possible if you provided one of these:" << endl;
5772+
for (auto& index_set : missing_index_sets) {
5773+
ss << "\t";
5774+
for (auto name_iter = index_set.begin(); name_iter != index_set.end(); ++name_iter) {
5775+
if (name_iter != index_set.begin()) {
5776+
ss << ", ";
5777+
}
5778+
ss << *name_iter;
5779+
}
5780+
ss << endl;
5781+
}
57565782
}
5757-
ss << "are insufficient to create target index " << target << endl;
57585783
msg = ss.str();
57595784
}
57605785

src/index_registry.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -452,12 +452,14 @@ class InsufficientInputException : public runtime_error {
452452
public:
453453
InsufficientInputException() = delete;
454454
InsufficientInputException(const IndexName& target,
455-
const IndexRegistry& registry) noexcept;
455+
const IndexRegistry& registry,
456+
const set<IndexGroup>& missing_index_sets = {}) noexcept;
456457
const char* what() const noexcept;
457458
private:
458459
string msg;
459460
IndexName target;
460461
vector<IndexName> inputs;
462+
set<IndexGroup> missing_index_sets;
461463
};
462464

463465

src/minimizer_mapper.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,6 +432,10 @@ class MinimizerMapper : public AlignerClient {
432432
static constexpr int default_wfa_max_distance = WFAExtender::ErrorModel::default_distance().max;
433433
int wfa_max_distance = default_wfa_max_distance;
434434

435+
/// How much should candidate alignment scores be penalized for softclipped bases?
436+
static constexpr double default_softclip_penalty = 0.0;
437+
double softclip_penalty = default_softclip_penalty;
438+
435439
/// Should alignments be ranked by chain score instead of base-level score?
436440
static constexpr bool default_sort_by_chain_score = false;
437441
bool sort_by_chain_score = default_sort_by_chain_score;

0 commit comments

Comments
 (0)