Skip to content

Rapid HTS Application Development using javascript and HTSJDK

Pierre Lindenbaum edited this page Nov 10, 2017 · 3 revisions

Since Java SE 8, the Java JDK is shipped with a java-based javascript engine called Oracle Nashorn.

Using the Nashorn engine 'jjs' ( https://docs.oracle.com/javase/8/docs/technotes/tools/unix/jjs.html ) command-line tool and the htsjdk library it's possible to quickly develop a scripted HTS application using the htsjdk classes.

Example

Working with VCF files

The following javascript program converts a VCF file to a BED file.

if(arguments.length!=1)
	{
	java.lang.System.err.println("arguments required : one VCF File ");
	java.lang.System.exit(-1);
	}
//get stdout
var out = java.lang.System.out;
// retrieve java class for java.io.File
var File = Java.type("java.io.File");
// retrieve java class for VCFFileReader
var VCFFileReader = Java.type("htsjdk.variant.vcf.VCFFileReader");
// create a new File object
var vcfFile = new File(arguments[0]);
// open the VCF reader
var vcfFileReader = new VCFFileReader(vcfFile,false);
// get an iterator over the variants
var iter = vcfFileReader.iterator();
// print each variant as a BED file
while(iter.hasNext()) {
	var vc = iter.next();
	out.print(vc.getContig());
	out.print("\t");
	out.print( new java.lang.Integer(vc.getStart()-1));/* prevent floating number */
	out.print("\t");
	out.print(vc.getEnd());
	out.println();
	}
//cleanup
iter.close();
vcfFileReader.close();

Invocation:

jjs -cp `find htsjdk/ -name "*.jar" |tr "\n" ":"` script.js -- input.vcf

Working with BAM files

The following javascript program count the number of records in a set of BAM files:

var out=java.lang.System.out;
var File = Java.type("java.io.File");
var SamReaderFactory = Java.type("htsjdk.samtools.SamReaderFactory");

for(var idx in arguments)
	{
	var n=0;
	var samReader =  SamReaderFactory.makeDefault().open(new File(arguments[idx]));
	var iter=samReader.iterator();
		while(iter.hasNext()) {
		var rec=iter.next();
		n++;
		}

	samReader.close();
	out.println(arguments[idx]+"\t"+n);
	}

invocation:

jjs -cp `find dist build/libs -name "*.jar"  | tr "\n" ":"` script.js -- in1.bam in2.sam

Example: filtering read pairs where at least one read match a regex;

see https://bioinformatics.stackexchange.com/questions/2812/filter-bam-file-for-read-pairs-where-one-or-both-of-the-reads-starts-with-a-give

java -jar /path/to/picard.jar SortSam I=in.bam O=/dev/stdout  SO=queryname |\
jjs -cp /path/to/picard.jar  script.js |\
samtools sort -T TMP -o out.bam - 

with script.js :

var out=java.lang.System.out;
var File = Java.type("java.io.File");
var SamReaderFactory = Java.type("htsjdk.samtools.SamReaderFactory");
var SamInputResource = Java.type("htsjdk.samtools.SamInputResource");
var SAMFileWriterFactory = Java.type("htsjdk.samtools.SAMFileWriterFactory");
var samReader =  SamReaderFactory.makeDefault().open(SamInputResource.of(java.lang.System.in));
var samWriter =  new SAMFileWriterFactory().makeBAMWriter(samReader.getFileHeader(),true,java.lang.System.out);
var iter=samReader.iterator();
var buffer=[];
for(;;)
   		{
		var rec=null;
		if(iter.hasNext()) rec=iter.next();
		if(rec==null || (buffer.length>0 && !rec.getReadName().equals(buffer[0].getReadName())))
			{
			var i=0;
			for(i=0;i< buffer.length;++i)
				{
				if(buffer[i].getReadString().match(/^AAAA/)) break;
				}
			if(i!=buffer.length)
				{
				for(i=0;i< buffer.length ;++i)
					{
					samWriter.addAlignment(buffer[i]);
					}
				}
			if(rec==null) break;
			buffer=[];
			}
		buffer.push(rec);
		}

samReader.close();
samWriter.close();