-
Notifications
You must be signed in to change notification settings - Fork 244
Rapid HTS Application Development using javascript and HTSJDK
Pierre Lindenbaum edited this page Nov 10, 2017
·
3 revisions
Since Java SE 8, the Java JDK is shipped with a java-based javascript engine called Oracle Nashorn.
Using the Nashorn engine 'jjs' ( https://docs.oracle.com/javase/8/docs/technotes/tools/unix/jjs.html ) command-line tool and the htsjdk library it's possible to quickly develop a scripted HTS application using the htsjdk classes.
The following javascript program converts a VCF file to a BED file.
if(arguments.length!=1)
{
java.lang.System.err.println("arguments required : one VCF File ");
java.lang.System.exit(-1);
}
//get stdout
var out = java.lang.System.out;
// retrieve java class for java.io.File
var File = Java.type("java.io.File");
// retrieve java class for VCFFileReader
var VCFFileReader = Java.type("htsjdk.variant.vcf.VCFFileReader");
// create a new File object
var vcfFile = new File(arguments[0]);
// open the VCF reader
var vcfFileReader = new VCFFileReader(vcfFile,false);
// get an iterator over the variants
var iter = vcfFileReader.iterator();
// print each variant as a BED file
while(iter.hasNext()) {
var vc = iter.next();
out.print(vc.getContig());
out.print("\t");
out.print( new java.lang.Integer(vc.getStart()-1));/* prevent floating number */
out.print("\t");
out.print(vc.getEnd());
out.println();
}
//cleanup
iter.close();
vcfFileReader.close();
Invocation:
jjs -cp `find htsjdk/ -name "*.jar" |tr "\n" ":"` script.js -- input.vcf
The following javascript program count the number of records in a set of BAM files:
var out=java.lang.System.out;
var File = Java.type("java.io.File");
var SamReaderFactory = Java.type("htsjdk.samtools.SamReaderFactory");
for(var idx in arguments)
{
var n=0;
var samReader = SamReaderFactory.makeDefault().open(new File(arguments[idx]));
var iter=samReader.iterator();
while(iter.hasNext()) {
var rec=iter.next();
n++;
}
samReader.close();
out.println(arguments[idx]+"\t"+n);
}
invocation:
jjs -cp `find dist build/libs -name "*.jar" | tr "\n" ":"` script.js -- in1.bam in2.sam
java -jar /path/to/picard.jar SortSam I=in.bam O=/dev/stdout SO=queryname |\
jjs -cp /path/to/picard.jar script.js |\
samtools sort -T TMP -o out.bam -
with script.js :
var out=java.lang.System.out;
var File = Java.type("java.io.File");
var SamReaderFactory = Java.type("htsjdk.samtools.SamReaderFactory");
var SamInputResource = Java.type("htsjdk.samtools.SamInputResource");
var SAMFileWriterFactory = Java.type("htsjdk.samtools.SAMFileWriterFactory");
var samReader = SamReaderFactory.makeDefault().open(SamInputResource.of(java.lang.System.in));
var samWriter = new SAMFileWriterFactory().makeBAMWriter(samReader.getFileHeader(),true,java.lang.System.out);
var iter=samReader.iterator();
var buffer=[];
for(;;)
{
var rec=null;
if(iter.hasNext()) rec=iter.next();
if(rec==null || (buffer.length>0 && !rec.getReadName().equals(buffer[0].getReadName())))
{
var i=0;
for(i=0;i< buffer.length;++i)
{
if(buffer[i].getReadString().match(/^AAAA/)) break;
}
if(i!=buffer.length)
{
for(i=0;i< buffer.length ;++i)
{
samWriter.addAlignment(buffer[i]);
}
}
if(rec==null) break;
buffer=[];
}
buffer.push(rec);
}
samReader.close();
samWriter.close();