Skip to content

Commit caeb8ae

Browse files
committed
Add ubu (UNC Bioinformatics Utilities)
1 parent 5926b9e commit caeb8ae

6 files changed

+141
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<?xml version="1.0"?>
2+
<tool_dependency>
3+
<package name="ubu" version="1.2">
4+
<install version="1.0">
5+
<actions>
6+
<action type="download_by_url">https://github.com/mozack/ubu/releases/download/v1.2b/ubu-1.2b-SNAPSHOT-jar-with-dependencies.jar</action>
7+
<action type="move_directory_files">
8+
<source_directory>.</source_directory>
9+
<destination_directory>$INSTALL_DIR</destination_directory>
10+
</action>
11+
<action type="set_environment">
12+
<environment_variable name="UBU_JAR_PATH" action="set_to">$INSTALL_DIR</environment_variable>
13+
<environment_variable name="UBU_JAR" action="set_to">$INSTALL_DIR/ubu-1.2b-SNAPSHOT-jar-with-dependencies.jar</environment_variable>
14+
</action>
15+
</actions>
16+
</install>
17+
<readme></readme>
18+
</package>
19+
</tool_dependency>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
@XYZ13-SN749:180:D127FACXX:6:1101:1066:2717/1
2+
GTTNGTTTAAATCCTTGAGGGGTACAGCATCACTCGGATTCTGTGTCN
3+
+
4+
@@B#4=DDHHHHGJHHIIJIJJBHG9EHGHIIJHGGIJIJIJJHHHF#
5+
@XYZ13-SN749:180:D127FACXX:6:1101:1066:12878/1
6+
GCTNGACCTTGATGGACAGACCAATAAGGGTGGCCATGCTGCAGTGCN
7+
+
8+
@@=#4=BDHHHH:AFGFHCCGCHEGIIGIE:EFGH3:?DFIIIG9BF#
9+
@XYZ13-SN749:180:D127FACXX:6:1101:1066:130898/1
10+
CTTNTGCCTCAGCCTCCTGAGTGGCTGGGACTACAGGCACCCGCCACN
11+
+
12+
@@@#4=DDFHHFHIHHIIIII<FHGEGHG9B;DD<BDDDFHH>GHFE#
13+
@XYZ13-SN749:180:D127FACXX:6:1101:1067:49195/1
14+
GGCNTGTTTCGTTCCAACCTCTTGACCCAGGACAACGGCATTCTGACN
15+
+
16+
=<?#4=22CDD<ACE:CEFABFBFEFE@EEEIAD@?0:DDD9B8)8B#
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
@XYZ13-SN749:180:D127FACXX:6:1101:1066:2717/2
2+
TTGGGCAAGAGATGCGCTTATGTATATAAAGCAAAGAACAACACAGTN
3+
+
4+
BCCFFFFDHFGHHIJJJGHGHIFHHGEGIJIIJIIJJIJJIJJJJJG#
5+
@XYZ13-SN749:180:D127FACXX:6:1101:1066:12878/2
6+
CGCTAGAGGAGTTGAACGTAGTAGAGCAGGTGCGGGTTCAGGTTAGCN
7+
+
8+
@@@DDDDFHFHABGHGHEACFA?BEGDGHI8EBFHI6BD?BB=BFFE#
9+
@XYZ13-SN749:180:D127FACXX:6:1101:1066:130898/2
10+
CTGCAATCCCAGCACTTTGGGGGGCCGAGGCGGGCGGGGCACCAGGTN
11+
+
12+
@@BFFFFFHBFHHHIFBBIGHIGDBBBDDDDDB@##############
13+
@XYZ13-SN749:180:D127FACXX:6:1101:1067:49195/2
14+
CGCCCGCTCAATGTGGTCGTTGCCTTCGGCCTTGAGCTGAACGTGGTN
15+
+
16+
;8;7;??A:)<DFC?C+2<CG;8?F@B))60BG0B9?GI<H@B=FG##

ubu/test-data/sam2fastq_test.sam

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
@HD VN:1.0 SO:unsorted
2+
@SQ SN:chr1 LN:249250621
3+
@SQ SN:chr10 LN:135534747
4+
@SQ SN:chr11 LN:135006516
5+
@SQ SN:chr12 LN:133851895
6+
@SQ SN:chr13 LN:115169878
7+
@SQ SN:chr14 LN:107349540
8+
@SQ SN:chr15 LN:102531392
9+
@SQ SN:chr16 LN:90354753
10+
@SQ SN:chr17 LN:81195210
11+
@SQ SN:chr18 LN:78077248
12+
@SQ SN:chr19 LN:59128983
13+
@SQ SN:chr2 LN:243199373
14+
@SQ SN:chr20 LN:63025520
15+
@SQ SN:chr21 LN:48129895
16+
@SQ SN:chr22 LN:51304566
17+
@SQ SN:chr3 LN:198022430
18+
@SQ SN:chr4 LN:191154276
19+
@SQ SN:chr5 LN:180915260
20+
@SQ SN:chr6 LN:171115067
21+
@SQ SN:chr7 LN:159138663
22+
@SQ SN:chr8 LN:146364022
23+
@SQ SN:chr9 LN:141213431
24+
@SQ SN:chrM_rCRS LN:16569
25+
@SQ SN:chrX LN:155270560
26+
@SQ SN:chrY LN:59373566
27+
@RG ID:120604_XYZ13-SN749_0180_BD127FACXX_6_GATCAG PL:illumina PU:barcode LB:TruSeq SM:120604_XYZ13-SN749_0180_BD127FACXX_6_GATCAG
28+
XYZ13-SN749:180:D127FACXX:6:1101:1066:2717/1 83 chr3 197681001 63 18M1602N30M = 197678145 -4506 NGACACAGAATCCGAGTGATGCTGTACCCCTCAAGGATTTAAACNAAC #FHHHJJIJIJIGGHJIIHGHE9GHBJJIJIIHHJGHHHHDD=4#B@@ XF:Z:GTAG, RG:Z:120604_XYZ13-SN749_0180_BD127FACXX_6_GATCAG IH:i:1 HI:i:1 NM:i:2 XS:A:+
29+
XYZ13-SN749:180:D127FACXX:6:1101:1066:2717/2 163 chr3 197678145 64 38M2691N10M = 197681001 4506 TTGGGCAAGAGATGCGCTTATGTATATAAAGCAAAGAACAACACAGTN BCCFFFFDHFGHHIJJJGHGHIFHHGEGIJIIJIIJJIJJIJJJJJG# XF:Z:GTAG, RG:Z:120604_XYZ13-SN749_0180_BD127FACXX_6_GATCAG IH:i:1 HI:i:1 NM:i:1 XS:A:+
30+
XYZ13-SN749:180:D127FACXX:6:1101:1066:12878/1 99 chr16 66967546 63 48M = 66967636 385 GCTNGACCTTGATGGACAGACCAATAAGGGTGGCCATGCTGCAGTGCN @@=#4=BDHHHH:AFGFHCCGCHEGIIGIE:EFGH3:?DFIIIG9BF# RG:Z:120604_XYZ13-SN749_0180_BD127FACXX_6_GATCAG IH:i:1 HI:i:1 NM:i:2
31+
XYZ13-SN749:180:D127FACXX:6:1101:1066:12878/2 147 chr16 66967636 64 7M247N41M = 66967546 -385 NGCTAACCTGAACCCGCACCTGCTCTACTACGTTCAACTCCTCTAGCG #EFFB=BB?DB6IHFBE8IHGDGEB?AFCAEHGHGBAHFHFDDDD@@@ XF:Z:CTAC, RG:Z:120604_XYZ13-SN749_0180_BD127FACXX_6_GATCAG IH:i:1 HI:i:1 NM:i:1 XS:A:-
32+
XYZ13-SN749:180:D127FACXX:6:1101:1066:130898/1 4 * 0 0 * * 0 0 CTTNTGCCTCAGCCTCCTGAGTGGCTGGGACTACAGGCACCCGCCACN @@@#4=DDFHHFHIHHIIIII<FHGEGHG9B;DD<BDDDFHH>GHFE# RG:Z:120604_XYZ13-SN749_0180_BD127FACXX_6_GATCAG
33+
XYZ13-SN749:180:D127FACXX:6:1101:1066:130898/2 4 * 0 0 * * 0 0 CTGCAATCCCAGCACTTTGGGGGGCCGAGGCGGGCGGGGCACCAGGTN @@BFFFFFHBFHHHIFBBIGHIGDBBBDDDDDB@############## RG:Z:120604_XYZ13-SN749_0180_BD127FACXX_6_GATCAG
34+
XYZ13-SN749:180:D127FACXX:6:1101:1067:49195/1 355 chr16 14972632 63 48M = 14976463 3879 GGCNTGTTTCGTTCCAACCTCTTGACCCAGGACAACGGCATTCTGACN =<?#4=22CDD<ACE:CEFABFBFEFE@EEEIAD@?0:DDD9B8)8B# RG:Z:120604_XYZ13-SN749_0180_BD127FACXX_6_GATCAG IH:i:3 HI:i:3 NM:i:2
35+
XYZ13-SN749:180:D127FACXX:6:1101:1067:49195/1 99 chr16 16371281 63 48M = 16375108 3875 GGCNTGTTTCGTTCCAACCTCTTGACCCAGGACAACGGCATTCTGACN =<?#4=22CDD<ACE:CEFABFBFEFE@EEEIAD@?0:DDD9B8)8B# RG:Z:120604_XYZ13-SN749_0180_BD127FACXX_6_GATCAG IH:i:3 HI:i:1 NM:i:2
36+
XYZ13-SN749:180:D127FACXX:6:1101:1067:49195/1 339 chr16 18528523 63 48M = 18524696 -3875 NGTCAGAATGCCGTTGTCCTGGGTCAAGAGGTTGGAACGAAACANGCC #B8)8B9DDD:0?@DAIEEE@EFEFBFBAFEC:ECA<DDC22=4#?<= RG:Z:120604_XYZ13-SN749_0180_BD127FACXX_6_GATCAG IH:i:3 HI:i:2 NM:i:2
37+
XYZ13-SN749:180:D127FACXX:6:1101:1067:49195/2 401 chr16 14976463 60 48M = 14972632 -3879 NACCACGTTCAGCTCAAGGCCGAAGGCAACGACCACATTGAGCGGGCG ##GF=B@H<IG?9B0GB06))B@F?8;GC<2+C?CFD<):A??;7;8; RG:Z:120604_XYZ13-SN749_0180_BD127FACXX_6_GATCAG IH:i:3 HI:i:3 NM:i:2
38+
XYZ13-SN749:180:D127FACXX:6:1101:1067:49195/2 147 chr16 16375108 60 48M = 16371281 -3875 NACCACGTTCAGCTCAAGGCCGAAGGCAACGACCACATTGAGCGGGCG ##GF=B@H<IG?9B0GB06))B@F?8;GC<2+C?CFD<):A??;7;8; RG:Z:120604_XYZ13-SN749_0180_BD127FACXX_6_GATCAG IH:i:3 HI:i:1 NM:i:2
39+
XYZ13-SN749:180:D127FACXX:6:1101:1067:49195/2 417 chr16 18524696 60 48M = 18528523 3875 CGCCCGCTCAATGTGGTCGTTGCCTTCGGCCTTGAGCTGAACGTGGTN ;8;7;??A:)<DFC?C+2<CG;8?F@B))60BG0B9?GI<H@B=FG## RG:Z:120604_XYZ13-SN749_0180_BD127FACXX_6_GATCAG IH:i:3 HI:i:2 NM:i:2

ubu/tool_dependencies.xml

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
<?xml version="1.0"?>
2+
<tool_dependency>
3+
<package name="ubu" version="1.2">
4+
<repository name="package_ubu_1_2" owner="jjohnson" />
5+
</package>
6+
</tool_dependency>

ubu/ubu_sam2fastq.xml

+45
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
<tool id="ubu_sam2fastq" name="UBU sam2fastq" version="0.1.0">
2+
<description>Convert a SAM/BAM file to FASTQ</description>
3+
4+
<requirements>
5+
<requirement version="1.2">ubu</requirement>
6+
</requirements>
7+
<stdio>
8+
<exit_code range="1:" />
9+
</stdio>
10+
11+
<command><![CDATA[
12+
java -Xmx1G -jar \$UBU_JAR sam2fastq --in $input --fastq1 paired1.fastq --fastq2 paired2.fastq --end1 '$end1' --end2 '$end2'
13+
]]></command>
14+
<inputs>
15+
<param name="input" type="data" format="sam,bam" label="Sam or Bam file to convert" />
16+
<param name="end1" type="text" value="/1" label="Id suffix used to identify the first read in a pair." />
17+
<param name="end2" type="text" value="/2" label="Id suffix used to identify the second read in a pair." />
18+
</inputs>
19+
<outputs>
20+
<data name="fastq1" format="data" from_work_dir="paired1.fastq" />
21+
<data name="fastq2" format="data" from_work_dir="paired2.fastq" />
22+
</outputs>
23+
<tests>
24+
<test>
25+
<param name="input" value="sam2fastq_test.sam" />
26+
<output name="fastq1" file="sam2fastq_expected_paired1.fastq"/>
27+
<output name="fastq2" file="sam2fastq_expected_paired2.fastq"/>
28+
</test>
29+
</tests>
30+
<help><![CDATA[
31+
input SAM/BAM file
32+
Option Description
33+
------ -----------
34+
--in Required input sam or bam file
35+
--fastq1 Required output FASTQ file
36+
--fastq2 Second FASTQ file for paired end
37+
--end1 Id suffix used to identify the first
38+
read in a pair. i.e. /1 (omit this
39+
option to use bit flag)
40+
--end2 Id suffix used to identify the second
41+
read in a pair. i.e. /2 (omit this
42+
option to use bit flag)
43+
44+
]]></help>
45+
</tool>

0 commit comments

Comments
 (0)