Skip to content

Commit 844c070

Browse files
Merge pull request #2 from pangenome/copy-aln-records
Add method to copy alignment records with trace data
2 parents 1256d44 + 8551db1 commit 844c070

File tree

3 files changed

+137
-49
lines changed

3 files changed

+137
-49
lines changed

Cargo.lock

Lines changed: 38 additions & 47 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ libc = "0.2"
1717
num_cpus = "1.16"
1818
nix = { version = "0.27", features = ["fs", "process"] }
1919
which = "6.0"
20-
onecode = { git = "https://github.com/pangenome/onecode-rs.git", rev = "2811715" }
20+
onecode = { git = "https://github.com/pangenome/onecode-rs.git", rev = "3ac229e39d72b0ac724c688e91fb2525e08922f7" }
2121

2222
[build-dependencies]
2323
cc = "1.0"

src/onelib.rs

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ pub struct AlnRecord {
6868

6969
/// Reader for .1aln files
7070
pub struct AlnReader {
71-
file: OneFile,
71+
pub file: OneFile,
7272
num_alignments: i64,
7373
contig_offsets: std::collections::HashMap<i64, (i64, i64)>, // contig_id → (sbeg, clen)
7474
}
@@ -508,6 +508,103 @@ impl AlnWriter {
508508
Ok(())
509509
}
510510

511+
/// Copy an alignment record directly from input to output, preserving all data including trace points
512+
///
513+
/// This method copies the raw alignment record from the input file positioned at an 'A' record,
514+
/// including all associated data lines ('R', 'L', 'M', 'D', 'Q', 'T', 'X', etc.).
515+
/// This is the preferred method for filtering, as it preserves trace data that ALNtoPAF needs.
516+
///
517+
/// # Safety
518+
/// The input_file must be positioned at an 'A' record when this method is called.
519+
pub fn copy_alignment_record_from_file(&mut self, input_file: &mut OneFile) -> Result<()> {
520+
// Verify we're at an 'A' record
521+
if input_file.line_type() != 'A' {
522+
anyhow::bail!("Input file not positioned at 'A' record");
523+
}
524+
525+
// Copy 'A' record (alignment coordinates)
526+
for i in 0..6 {
527+
self.file.set_int(i, input_file.int(i));
528+
}
529+
self.file.write_line('A', 0, None);
530+
531+
// Copy all associated records until we hit 'T' (trace) or next 'A'
532+
loop {
533+
let next_type = input_file.read_line();
534+
535+
if next_type == '\0' {
536+
break; // EOF
537+
}
538+
539+
match next_type {
540+
'T' => {
541+
// Copy T record (trace point positions - INT_LIST)
542+
if let Some(t_values) = input_file.int_list() {
543+
let len = t_values.len() as i64;
544+
let ptr = t_values.as_ptr() as *mut std::ffi::c_void;
545+
self.file.write_line('T', len, Some(ptr));
546+
} else {
547+
// Empty T record
548+
self.file.write_line('T', 0, None);
549+
}
550+
551+
// Next should be X record (trace point diffs)
552+
let x_type = input_file.read_line();
553+
if x_type == 'X' {
554+
// Copy X record (INT_LIST of differences at each trace point)
555+
if let Some(x_values) = input_file.int_list() {
556+
let len = x_values.len() as i64;
557+
let ptr = x_values.as_ptr() as *mut std::ffi::c_void;
558+
self.file.write_line('X', len, Some(ptr));
559+
} else {
560+
// Empty X record
561+
self.file.write_line('X', 0, None);
562+
}
563+
}
564+
break; // Done with this alignment
565+
}
566+
'R' => {
567+
// Reverse complement flag
568+
self.file.write_line('R', 0, None);
569+
}
570+
'L' => {
571+
// Sequence lengths
572+
self.file.set_int(0, input_file.int(0));
573+
self.file.set_int(1, input_file.int(1));
574+
self.file.write_line('L', 0, None);
575+
}
576+
'M' => {
577+
// Matches
578+
self.file.set_int(0, input_file.int(0));
579+
self.file.write_line('M', 0, None);
580+
}
581+
'D' => {
582+
// Differences
583+
self.file.set_int(0, input_file.int(0));
584+
self.file.write_line('D', 0, None);
585+
}
586+
'Q' => {
587+
// Quality
588+
self.file.set_int(0, input_file.int(0));
589+
self.file.write_line('Q', 0, None);
590+
}
591+
'A' => {
592+
// Hit next alignment without seeing 'T' - this alignment had no trace data
593+
// Write empty trace records
594+
self.file.write_line('T', 0, None);
595+
self.file.write_line('X', 0, None);
596+
break;
597+
}
598+
_ => {
599+
// Skip unknown records
600+
continue;
601+
}
602+
}
603+
}
604+
605+
Ok(())
606+
}
607+
511608
/// Write an alignment to the file
512609
pub fn write_alignment(&mut self, aln: &Alignment) -> Result<()> {
513610
// Parse sequence IDs from names (they should be numeric contig IDs)

0 commit comments

Comments
 (0)