diff --git a/pyfaidx/__init__.py b/pyfaidx/__init__.py old mode 100644 new mode 100755 index d9f40f7..8b06d0a --- a/pyfaidx/__init__.py +++ b/pyfaidx/__init__.py @@ -4,10 +4,11 @@ """ from __future__ import division - +import bisect import os import re import string +import struct import sys import warnings from collections import namedtuple @@ -15,7 +16,6 @@ from math import ceil from os.path import getmtime from threading import Lock - from six import PY2, PY3, integer_types, string_types from six.moves import zip_longest @@ -312,6 +312,34 @@ def __len__(self): return self.rlen +class BgzfBlock(namedtuple('BgzfBlock', ['cstart', 'clen', 'ustart', 'ulen'])): + __slots__ = () + + def __getitem__(self, key): + if type(key) == str: + return getattr(self, key) + return tuple.__getitem__(self, key) + + def as_bytes(self): + return struct.pack('<QQ', self.cstart, self.ustart) + + def __lt__(self, other): + if self.ustart < other: + return True + + def __len__(self): + return self.ulen + + @property + def empty(self): + """Check for the EOF marker, which is an empty BGZF block. + https://github.com/biopython/biopython/blob/master/Bio/bgzf.py#L171""" + if self.ulen == '0': + return True + else: + return False + + class Faidx(object): """ A python implementation of samtools faidx FASTA indexing """ @@ -341,29 +369,22 @@ def __init__(self, """ self.filename = filename - if filename.lower().endswith('.bgz') or filename.lower().endswith( - '.gz'): - # Only try to import Bio if we actually need the bgzf reader. - try: - from Bio import bgzf - from Bio import __version__ as bgzf_version - from distutils.version import LooseVersion - if LooseVersion(bgzf_version) < LooseVersion('1.73'): - raise ImportError - except ImportError: - raise ImportError( - "BioPython >= 1.73 must be installed to read block gzip files.") + with open(self.filename, 'rb') as sniffer: + snuff = sniffer.read(4) + if snuff == '\x1f\x8b\x08\x04': + try: + from Bio import bgzf + except ImportError: + raise ImportError( + "BioPython must be installed to read BGZF files.") + else: + self._fasta_opener = bgzf.open + self._bgzf = True + self.gzi_index = OrderedDict() + self.gzi_indexname = filename + '.gzi' else: - self._fasta_opener = bgzf.open - self._bgzf = True - elif filename.lower().endswith('.bz2') or filename.lower().endswith( - '.zip'): - raise UnsupportedCompressionFormat( - "Compressed FASTA is only supported in BGZF format. Use " - "bgzip to compresss your FASTA.") - else: - self._fasta_opener = open - self._bgzf = False + self._fasta_opener = open + self._bgzf = False try: self.file = self._fasta_opener(filename, 'r+b' @@ -396,10 +417,6 @@ def __init__(self, self.duplicate_action = duplicate_action self.as_raw = as_raw self.default_seq = default_seq - if self._bgzf and self.default_seq is not None: - raise FetchError( - "The default_seq argument is not supported with using BGZF compression. Please decompress your FASTA file and try again." - ) if self._bgzf: self.strict_bounds = True else: @@ -435,7 +452,15 @@ def __init__(self, self.read_fai() else: self.read_fai() - + if self._bgzf: + if os.path.exists(self.gzi_indexname) and getmtime(self.gzi_indexname) >= getmtime(self.gzi_indexname): + self.read_gzi() + elif os.path.exists(self.gzi_indexname) and getmtime(self.gzi_indexname) < getmtime(self.gzi_indexname) and not rebuild: + self.read_gzi() + warnings.warn("BGZF Index file {0} is older than FASTA file {1}.".format(self.gzi_indexname, self.filename), RuntimeWarning) + else: + self.build_gzi() + self.write_gzi() except FastaIndexingError: os.remove(self.indexname) self.file.close() @@ -558,8 +583,7 @@ def build_index(self): "Bad sequence name %s at line %s." % (line.rstrip('\n\r'), str(i))) offset += line_blen - thisoffset = fastafile.tell( - ) if self._bgzf else offset + thisoffset = offset else: # check line and advance offset if not blen: blen = line_blen @@ -603,13 +627,75 @@ def build_index(self): % self.indexname) elif isinstance(e, FastaIndexingError): raise e - + + def build_gzi(self): + """ Build the htslib .gzi index format """ + from Bio import bgzf + with open(self.filename, 'rb') as bgzf_file: + self.gzi_index = [] + for i, values in enumerate(bgzf.BgzfBlocks(bgzf_file)): + self.gzi_index.append(BgzfBlock(*values)) + eof = self.gzi_index.pop() + if not eof.empty: + raise IOError("BGZF EOF marker not found. File %s is not a valid BGZF file." % self.filename) + + + def write_gzi(self): + """ Write the on disk format for the htslib .gzi index + https://github.com/samtools/htslib/issues/473""" + with open(self.gzi_indexname, 'wb') as bzi_file: + bzi_file.write(struct.pack('<Q', len(self.gzi_index))) + for block in self.gzi_index: + bzi_file.write(block.as_bytes()) + + def read_gzi(self): + """ Read the on disk format for the htslib .gzi index + https://github.com/samtools/htslib/issues/473""" + from ctypes import c_uint64, sizeof + with open(self.gzi_indexname, 'rb') as bzi_file: + number_of_blocks = struct.unpack('<Q', bzi_file.read(sizeof(c_uint64)))[0] + self.gzi_index = [] + for i in range(number_of_blocks): + cstart, ustart = struct.unpack('<QQ', bzi_file.read(sizeof(c_uint64) * 2)) + if cstart == '' or ustart == '': + raise IndexError("Unexpected end of .gzi file. ") + else: + self.gzi_index.append(BgzfBlock(cstart, None, ustart, None)) + def write_fai(self): with self.lock: with open(self.indexname, 'w') as outfile: for line in self._index_as_string: outfile.write(line) + def build_gzi(self): + """ Build the htslib .gzi index format """ + from Bio import bgzf + with open(self.filename, 'rb') as bgzf_file: + for i, values in enumerate(bgzf.BgzfBlocks(bgzf_file)): + self.gzi_index[i] = BGZFblock(*values) + + def write_gzi(self): + """ Write the on disk format for the htslib .gzi index + https://github.com/samtools/htslib/issues/473""" + with open(self.gzi_indexname, 'wb') as bzi_file: + bzi_file.write(struct.pack('<Q', len(self.gzi_index))) + for block in self.gzi_index.values(): + bzi_file.write(block.as_bytes()) + + def read_gzi(self): + """ Read the on disk format for the htslib .gzi index + https://github.com/samtools/htslib/issues/473""" + from ctypes import c_uint64, sizeof + with open(self.gzi_indexname, 'rb') as bzi_file: + number_of_blocks = struct.unpack('<Q', bzi_file.read(sizeof(c_uint64)))[0] + for i in range(number_of_blocks): + cstart, ustart = struct.unpack('<QQ', bzi_file.read(sizeof(c_uint64) * 2)) + if cstart == '' or ustart == '': + raise IndexError("Unexpected end of .gzi file. ") + else: + self.gzi_index[i] = BGZFblock(cstart, None, ustart, None) + def from_buffer(self, start, end): i_start = start - self.buffer['start'] # want [0, 1) coordinates from [1, 1] coordinates i_end = end - self.buffer['start'] + 1 @@ -663,20 +749,31 @@ def from_file(self, rname, start, end, internals=False): newlines_to_end = int(end / i.lenc * (i.lenb - i.lenc)) if i.lenc else 0 newlines_inside = newlines_to_end - newlines_before seq_blen = newlines_inside + seq_len - bstart = i.offset + newlines_before + start0 + if seq_blen < 0 and self.strict_bounds: raise FetchError("Requested coordinates start={0:n} end={1:n} are " "invalid.\n".format(start, end)) elif end > i.rlen and self.strict_bounds: + if self._bgzf: + raise FetchError("Requested end coordinate {0:n} outside of {1}. " + "strict_bounds=False is incompatible with BGZF compressed files." + "\n".format(end, rname)) raise FetchError("Requested end coordinate {0:n} outside of {1}. " "\n".format(end, rname)) with self.lock: - if self._bgzf: # We can't add to virtual offsets, so we need to read from the beginning of the record and trim the beginning if needed - self.file.seek(i.offset) - chunk = start0 + newlines_before + newlines_inside + seq_len - chunk_seq = self.file.read(chunk).decode() - seq = chunk_seq[start0 + newlines_before:] + bstart = i.offset + newlines_before + start0 # uncompressed offset for the start of requested string + if self._bgzf: + from Bio import bgzf + insertion_i = bisect.bisect_left(self.gzi_index, bstart) + if insertion_i == 0: # bisect_left already returns index to the left if values are the same + start_block = self.gzi_index[insertion_i] + else: + start_block = self.gzi_index[insertion_i - 1] + within_block_offset = start_block.ustart + bstart + print(self.gzi_index) + print(locals()) + self.file.seek(bgzf.make_virtual_offset(start_block.cstart, within_block_offset)) else: self.file.seek(bstart) @@ -1294,7 +1391,38 @@ def get_valid_filename(s): 'chromosome_6.fa' """ s = str(s).strip().replace(' ', '_') - return re.sub(r'(?u)[^-\w.]', '', s) + return re.sub(r'(?u)[^-\w.]', '', s) + +def unpack_gzi_to_blocks(gzi_bytes): + """ Unpacks the bgzip .gzi format to a tuple of + (compressed offset, uncompressed offset) describing + the BGZF compressed FASTA file. + >>> unpack_gzi_to_blocks(b'\x02\x00\x00\x00\x00\x00\x00\x00\x1e(\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x8a1\x00\x00\x00\x00\x00\x00\xff\x1c\x01\x00\x00\x00\x00\x00') + ((10270, 65280), (12682, 72959)) + """ + import struct + + n_bytes = len(gzi_bytes) + gzi_ints = struct.Struct('<%sQ' % str(n_bytes // 8)).unpack(gzi_bytes) # little-endian 64-bit unsigned integers + n_blocks = gzi_ints[0] + + block_offsets = tuple(zip(*[iter(gzi_ints[1:])] * 2)) + return block_offsets + +def pack_blocks_to_gzi(block_offsets): + """ Packs the bgzip .gzi format from a tuple of + (compressed offset, uncompressed offset) describing + the BGZF compressed FASTA file. + >>> pack_blocks_to_gzi(((10270, 65280), (12682, 72959))) + b'\x02\x00\x00\x00\x00\x00\x00\x00\x1e(\x00\x00\x00\x00\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x8a1\x00\x00\x00\x00\x00\x00\xff\x1c\x01\x00\x00\x00\x00\x00' + """ + import struct + from itertools import chain + + n_blocks = len(block_offsets) + gzi_bytes = struct.Struct('<%sQ' % str(n_blocks * 2 + 1)).pack(n_blocks, *chain(*block_offsets)) # little-endian 64-bit unsigned integers + + return gzi_bytes if __name__ == "__main__": diff --git a/tests/data/chr17.hg19.part.fa b/tests/data/chr17.hg19.part.fa deleted file mode 100644 index 962d41c..0000000 --- a/tests/data/chr17.hg19.part.fa +++ /dev/null @@ -1,2 +0,0 @@ ->chr17 -AAGCTTCTCACCCTGTTCCTGCATAGATAATTGCATGACAATTGCCTTGTCCCTGCTGAATGTGCTCTGGGGTCTCTGGGGTCTCACCCACGACCAACTCCCTGGGCCTGGCACCAGGGAGCTTAACAAACATCTGTCCAGCGAATACCTGCATCCCTAGAAGTGAAGCCACCGCCCAAAGACACGCCCATGTCCAGCTTAACCTGCATCCCTAGAAGTGAAGGCACCGCCCAAAGACACGCCCATGTCCAGCTTATTCTGCCCAGTTCCTCTCCAGAAAGGCTGCATGGTTGACACACAGTGcctgcgacaaagctgaatgctatcatttaaaaactccttgctggtttgagaggcagaaaatgatatctcatagttgctttactttgcatattttAAAATTGTGACTTTCATGGCATAAATAATACTGGTTTATTACAGAAGCACTAGAAAATGCATGTGGACAAAAGTTGGGATTAGGAGagagaaatgaagacatatgtccacacaaaaacctgttcattgcagctttctaccatcaccaaaaattgcaaacaaccacacgcccttcaactggggaactcatcaacaacaaacttgtggtttacccacacaatggaagaccacttagcaacaaaaaggaccaaactcctggtacatgcaactgacagatgaatctcaaacgcattcctccgtgtgaaagaagccggactcacagggcaacacactatctgactgtttcatgggaaagtctggaaacggcaacaccattgagacagaaaacaggtgagtggttgcctggggccagggaactttctggggtcatattctctgtgttgattctggtggtggaaacaagactgtCccagcctgggtgatacagcgagaccccatctctaccaaaaaattaaaaattagctgggcatggtggtgcatgcctgtagtcccagctattcacagtgctgaggtgggaagatgcttgagcccaggagttcaaggctgcaatgagctatgattgcgccactgcactttggcctggacaacagagcaaaaccctgtctctaaaaaaagaaaagaaaagaaaaaCTCACTGGATATGAATGATAcaggttgaggatccattatctgaaatgcttggaccagatgttttgaattttggattttttcatattttgtaatctttgcagtatatttaccagttcagcatccctaactcaaaaattcaaaaatctgaaatcccaaacgcgccaataagcattccctttgagcgtcatgtcggtgcttggaatgtttggggttttggatttacagctttgggacgctcaacctgTACCTCAATAAACCTGATTTTAAAAAAGTTTGGGGGGATTCCCCTAAGCCCGCCACCCGGAGACAGCGGATTTCCTTAGTTACTTACTATGCTCCTTGGCCATTTCTCTAGGTATTGGTATATTGTGTCTGCTGTGAACTGTCCTTGGCCTGTTTGGTGACGGGTGAGGAGCAGGGACAGAAGGGTCCTGCGTGCCCTGCCTTCACAAGCCCCTGGAAGGAAAGTTGTTTTGGGATCTCTGCACCCTCAGCCTGGACAACTTGTGCCCATCTGGTGACCCCTCACTCAGCCACCAGACTTCCACGACAGGCTCCAGCCTCGGCACCTTCAGCCATGGACAGTTCCGCCAGCGTTGCCCTCTGTTCTGCTGTTTTCTCTACCAGAAGTGCCCTTCCCTCCTCACCTGACCACTCTGGGGAAATCCCTCAGCACCCTCCCTGAGCATACCCTACTCTGGCACAAGCCCACCCTGCAAAGCCCCTGAGGCCCGCCCTGTGGCGTCTCTCCCTCCCTTGCTGTCAGGACAGTGGTCCTGGCCACCGGGGCTCACGGAGCCGCCCTGTGCCGTGTACCTCTGAGCCCTCTGCACAGTGCCTTCTGCTTGCCTGTGGCTTTGAGAAGAaaccccttctggttatacataagacagccagagaagggagttgcccagggtggcacagcacgttgctgccagTTACTGCCATTTTCACGGGCATGAAATGGAGATAACAACAGGAGCGACCGCACAGGCTGCTGAGCGCGTCACACGCAGCCATCGCGCAGCTCAGGGATATTACGTGTAACTCGACATGTCAGCGATTGTCACAGGCACTGCTACTCCTGGGGTTTTCCATCAAACCCTCAAGAGCTGGGCCTGGGGTCAACTTCCGGCCTGGGGAAACTGGGGCAAGTATCACCAGAGATGAGCTTTATAAAAATAATGGTGCTAgctgggcatggtggcttgcacctgtaatcccagcactttgggaggccgagctaggaggatcgtttgagtccagcagtttgagaccagcctggccaatacggcaaaacccagtctctacaaaaaatacaaaaaacaactagccaggcgtggtggtgcacacctgtagtcccagctactcaggaggctgagggggaaggactgcttgagcccaggagtttgaggctgctgtgagctgtgatcgcatcactgcattccagcccggtgacagagtgagtcactgtctcaaaaaagaaaggaagaaataaagaaaacaaATAAAAATAATAGTGCAGACAAAAGGCCTTGACCCATCTAGCTTTGGCCCTCAGCATCAACCGCTAGATACGTCCCTCCCTTTCTTCTGGGGCACAGGTCACACTCTCTTCCAGGTCTAGGATGCAGCTGAGGGGTGCCCCTCTTACCATCTAATCTGTGCCCTTATTTCCTCTGCTTTAGTGAGGAAGAGGCCCCTGGTCCATGAAGGGGCCTTTCAGAGACGGGGACCCCTGAGGAGCCCCGAGCAGCAGCCGTCGTGTCTCACCCAGGGTGTCTGAAACAGATGTGGAGGTCTCGGGTGAGGCGTGGCTCAGATACAGGGAGTGGCCCACAGCTCGGCCTGTCTTTGAAAGGCCACGTGACCTGGCCCACGGCTGGCAGGTGGGACCCAGCTGCAGGGGTCCAGCAGCACCCACAGCAGCCACCTGTGGCAGGGAGGAGCTTGTGGTACAGTGGACAGGCCCTGCCCAGATGGCCCCCCGCCTGCCTGTGGAAGTTGACCAGACCATCTGTCACAGCAGGTAAGACTCTGCTTTCTGGGCAACCCAGCAGGTGACCCTGGAATTCCTGTCCATCTGGCAGGTGGGCATTGAAACTGGTTTAAAAATGTCACACCATAggccgggcacagtggctcacgcctgtaatcccagccctttgggaggccagggtgggtggatcacttgaggtcaggagttcaagaccagcctggccaacatggtgaaaccccgtctactaaaaatacaaaaattagcctggcgtggtggcgcatgcctgtaatcccagctacttgggaagctgagggatgagaactgcttgaacctgggaggcagacgttgcagtgagctgagatcacgccactgcactccagcctgggcaacagagtaagactctgtctcaaaaaaaaaaaaaTCACACCATTTTGGCTTCAGATTGCATATCCTCCTGCAAGGATATATACGCGTGAAATTCAAGTCAATGACAAATCAGAAGAAAAAACATATATATACGCAAACCAGTATCCTACTGTGTGTGTCGTTTGTTGTGTTTTCGACAGCTGTCCGTGTTATAATAATTCctctagttcaaatttattcatttttaacttcatagtaccacattctacacactgcccatgtcccctcaagcttcccctggctcctgcaaccacaaatctactctctgcctctgtgggttgacctattctggacacgtcatagaaatagagtcctgcaacacgtggccgtctgtgtctggcttctctcgcttagcatcttgtttccaaggtcctcccacagtgtagcatgcacctgctacactccttcttagggctgatattccaCGCACCTGCTACACTCCTTCTTATGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACACACCCGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCCGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTAGGGCTGATATTCCACGCACCTGCTACACTCCTTCTTATGACTGATATTCCACGCACCtgctacactccttcttagggctgatattccactgcagggacagacttcatttgtgtatccattcatcagtggatggacacttggggtgtttccacttttggctgttgtggatagtgctgctatgaacattcctgcacaagttttaggatggacatgtttttcttatctcttgggtatataacaaggagtggaattgccagatcaaatggtgattctgtgtttaactttctgaggaactatcagctgcttcccaaagtggccatcccattattctcaTAttatttttatttgtttattatattttgagagtgtctcgctctgtcaccctggctggagtgcagtggtgtgatctcggctcactgcaatctccacctcccaggttgaagtgattttcctgcctcagcctcccaagtagctgcgattacaggcgcccgccaccacacccagctaatttttttatttttagtagagacgggtttccccatattggccaagctggtctcaaactcctgacctcaggtgatccgtgcgcctcggcctcccaaagtactgggattacaagcacgagccactgcactcagccTAATTTATTACTATTTTTAACTGTAGAGACAAGGTCTCAGTATGTTGCCAAGGCTGGTctcaaattcctgggttcaagcaatcctctcaagtagttaggactacagggacatgccactacaccaggctaatttttaatttttttatagagatggaggtctcactatgttgcccaggatggtctcaaactcctagcctcaagcaatcctcctgccttggcctcccaaagtgttcagaatgtaagtgtaactcactgcacctgCCACATACAATTTTTAAAGTGACAGAAATATGATCATGGCCATGGGAATGGTGCCCCTAGAGCTGTGCCACGAGGAGTACTGGCCTCTTCATGGTGCCAAGATGTCCCTGAGGCCTTAGTCACCTGGGTCCTTGGTGTCCCCTAGGTCAGGGCCATCCCTCTGTCATTCCCCCTCCCTGAAGCACCTGCCCCTCCTTTCTGCTGAACTAAATTCCTCCCCAAGTCTCAGTTTTCCAGAGTCTCCCTGTGAGCTCACACTCATACCTACCTAGTTTCTGAAGAGCCCCGAGCACTGACGGTAGTCACTGTGGCACCTGTAGCACCCTCTCCAAAGGGTCGCCAGCTCCTGCCTGGCTCTCAGAGCTACACAGCCTCTCCTGACCAGGCTCACAGCTCCACAGCTCTGCGGCCCAGGCCACCTGGCATGGCCCCCTGAGTTAGTCTCTCCCCAGGCCCACCATCAGCCCTGTTGGTAGAGCTGGGTGGACTCTTATCCGCATCTGTAGCACCATTATAGGGCTGGGCAAATGTGGGCAGTTGCAAAGGCCTGGCAGAGTTCCTCTGCATCCTCCCCCAGCCTCCTGGCTACCCCCGGCACTGGCCCCGCCTTCTGTCCCCTCCTGCAACTCATGGCCCCTCCTGGGCCCCTCAGTCACAGAGAGGCCTGGACATAGCATCAGGTGATAACtaatacctgcatgaccctgggaagccactcagcttctctgtccctcagtttccccatctgtgaaatgggctggccatgcttaacccctggagttgCCAAGGTAGCCCATCAGGGAACACAGCGCCCCTGTACCTCAGGCACTCCCTGGGTGCCTGCCACCTGGGACCACGGAGCCGGCACACGGACCCCCGTCCTTGGAGGTGAAGACGTGGCAGGTGGTCACGCGCACGGCACACTCACGTTTTCACGTAGGGGTCCGAGTAGCCGTTGGCGTCCATGGCGGCCAGGTGGGCGCACCGCACGATGCCTACCAGCAGGCCTTGCTTCTGTGAGCTGTACTTGAGGGAGATGAGGATGCGGCCCCGCTCCTCCAGGGACTTGTCTTCAGTCTTGTCCACCTGTTGGACGGGACGGTCACTCAGTCCTCACCTGCTCCCACCCCTCTCTTGGCCGTCCTTGGCCTCCTCTTCCTGAGCCCGCCCATCCGGCTGCTGCAGCCGGGCCTGGTCACGGTCCCTGGTGAGTGGCCTCACTGTGAACTCACAGCCCTTCTGTCATCTCTTCCCTCCCAAGGGCTCTTCCAGGGCTGGATCTGACCCACACCCTCCCTGTTCTTGGCTGCATGTGGCCTACGGTGGCCTTCACAGCCCAGCAAGGGCCAGCCcaggggttggcaactgcagcccgggggccagatcaggcccgacgcctggctctgtgtgggatgtgagctaagcatggctcttacccttatcttaaacaatttctttttcaaaaaaatagagacaggggtctcactattttgcccaggctgatctcaaactcctgggctcaagggatcttcccaccttggcctcccaaagtgctgggattacagacatgagccaccgtgcccagctggttcttactttttggaatggcagaaagaaaaatgaaatgaaaaatattttgtgacacatgaaaatgacatgaaattcacacttcagcttccattaacaccgtgttgttggaacgcagccttgccagctccgtgatgcttctctatggctgcttttgccctaaggacagagctgcatggtggccacagattgcgaggcacacagagcctaacattagcgctaagcggcccttcacggGTCTGCAGCCCCCGGCCTGGCGCGCTCGGCTTCCGCGGACGGTCTTCCACCAGGTCCTCCTCACAGCCCGCCCGGACTTTCCCTTGGCTTTGAAGCCGGGTCCCACCTCCACACCTTTGCGCACCCTTAATCCACAGCTCCAGCACCCTCCTCTGACCTCCTCAGTGTTCACCGTTGATAAATTCCTGGTCCTTCTGGGCCTGGGGGGCTTTTCTGACCCTCAGGGTGGGTTGGCCGTCCCCTCTGCGCGCCCACTGCCTCGGGTTTATCCTGTCATGGCCTGGTGTGGCCCCCTGCTTCCTCAGGGCTCTGCCCCAGCCTCTAAGTTCCTTGAAGTTGGAACTCTACCTGTAAAttttgattttgacagagtctgactcttgtcacccaggctggagtgcagtggcatgatcttggctcactacaacctctgcctcccgggtttaagcgattctcctgcctcagccttctgagtagctgggactacagtctaatttttctattttttggtagagacagggttttgccatgtctgccaggctggtctcgaattcctgacctcaagtgatccacccacctcagcctcctgaagtgctgggattacaggtatgagccaccgcatccagATAATACCTGCGTCATCTTTAATACCTCAGGGCTGGGCAAGGCCCTGGCACTAAGGGCCCACTGGAGGCTTGTACTAGTGAAGGCAGGAATGGAGGGACAGATGGTCCCCTGTGCGTTCAACCTCATACAAGCAACTCCAGCCTGGAGGTTCAAAGAAAGGCAGAAAGGTCTGCCCATGACAATGGAGCCTGGTGGATGGAAAAGGGTCACCGTGGCCCCAGTTCCCTGAAGGTGCTACCTGGAGCCTCTCAGGGTGCTGGATGTGGCTCCCTCAGGAGAACCCCGAAGACAGAGTTCTGGTACATTCCTCACCCTGGAGAAGCTGGGAGCCAAGTAGGGAGCCCATCCAGTGCCTTCCCTGCCTCAGCAGCTGCCAGTGCCTGCCTCTCAAGGCAGATCTCAGCTCCAGGCCTCCCCATCCCCAGCCAGCCTGATGCTTCTCCATTCCTTGCCCTCCCGAGACTACCCGGCTTCTTCCTTCTGGCTGCTGCACACCCCAAACCCTCTCTCCTAGGCCCTCAGCCCCCCCAGACAAACCCAGCTTGAGCCCACCCACACACGCCCAGCTTGAGCCCTGTGTCCCCCTGGATCCCCTTAGGACTCTACCAGGCTCCTCTCCCAAGCAGCCCCAGCCACCTCCTTTTGCCCACAACCGTGCCCTGCCTAACACCAAAGTGAGTCCCTCTGGCTACATCTAACCCTTTCTGGAGATGAGACTCCTGTGAGGTGAGCAGGGATGTTGAGcagggaagacatacatgccccaccagccaccttcgccacctcccacgcccagagcagacatggctaatcgatcctagcatttgccctaggctccaatgccacctcagaatccttttcaacacagtgctcaggcagccattcctggtggatcaggccaggcctgcgagatCTCGCTATCTGCCGCTCAGGCAGAGCTTTGTGTGTGAGGGCCTTGATGCCCTGTGCTGCTTGTGTCAGTGTGTGTGGTGTGAAAATTAAACAATATAAACTAGCAGGggccaggcacagtgactcatgcctgtaaccccagcactttgggaggccaaggcgggcagatcacctgaggtcagcagctcaggaccagcctggccaacatggtgaaaccccatctctactaaaaatacaaaaattagccgggtgtggtggcgcatgcctgtaatcccagctattgggaggctgaggcagaagaatcacttgaacccgggaggcggaggttgaagtaagccgacattgcgccactgcactccagcctgggcgacatagactccatctcataaaaaaaaaaaaaaaacaaCCAACCAGCAGGCATATTTTTAGCTCTTTTTTCAGGGGTGGGACATTGTACTTCGGGTTGTTTCATATGAAGACCACTGGGTCTTGCTCAGTATTGACTTAAAACAGATAATGTTCGCAGACTGTAAATTCTAAAACCTACCGCCAGAggcctggcacaggggctcatgcctgtaatcccagcactttgggaggccgaggcaggaggatcactggaggtcaggagtttgagacttgcctggccaacatggcgaacccccgtctctactaaaaataccaaaattagccaggcgtggtggcgcacacctgtaatcccagcactttgggaggctgaggcaggtggatcattcaaggtcaggagtttgagacacctggacaatatggtgaaaccccatctctaccaaatatacaaaaattaaccaggcgtggcggcacacgcctgtagtcccagctactcgggaggctgaggcatgagaattgtttgaactcaggaggtagaggttgcagtgaacagagattttgccactgaactccagcctggatgacagagcaagactcagtctcaaaaaataataataataaaaGTACCACCAGAATGTGGCTGTACTGTCAGGGGTGCATCCCCAGCTGCACTCCTGCGGTCACTGTGAGTCCCTGAACGGCACCAATGGGCCGGTAGCGCATCCAGCAACGCCCTGATCATGGCcacgcacagggacgcacatgctttcacgaacgcacaccacacatgtggacacacacactgtcgcacacagacacgtactgacatatgctcttacacacaattcacacacgagcacacacacacacacgctgacaccccacgtacatacccacGTGGTTGTTTGTTTATGCCAGTGATGAAAACTCAGGAACACTAAGGCAGGGCTGGTGTTGCtttttttttttttttttgagacagagtcttgctcttgctcttgtcacccaggctggagtgcaacggtgcaatctcggctcactgcaacctccgcctctcaggttcaagcgattctcctgcctcagcctcctgagtagctgggattacaggcatgcacccccacacccggctaatttttttatttttagcagagacggggtttcgccatgttggccaggctctctcgaactcttgacctcatgacccacctgccttggcctcctaagatgttgCCTTTCTTAAGTGACATAGACCATGTGGAAAAACCGGGTTACCTGTGGTTAGTGACTAACAATAAAACAGGAAAGGTTATATCCATCACACAAATGTCTGAGGGGGAGAGAATGTGACAAGGAATAAAATTGGATCAAATTCTGCAAAAGTAACTGGGATTCTGGGAAGAAGCCGTGGCCTCAGGCTGACTCGCCCCCGGGGCTTGACTTGGGCTAAGCTCGAGGTGAGTCCACGTCCCCGGGCCCCACTGGGGCTGGGTACACTGGGGACAGCCGCCGGGCTCTGTCCTCCCAAACTTGCCCCTTGCCCAGTCCTCTTAGGGGGACAACGTGCCATCGAGGGGACCATGCCTCCGCCTGTGTCCTGAACGCTGGGAGGCTGAGGCCCCAGGATTTCTCTTGACCCCAGTGGCACGGGGGACTCCTGGCTTCACCAGCCCTATGAACCAGGTGAAGGTGAGGCCATAGACAAGGGAGGATGGGGGAGGGAAGAGGGACATAGAGACCAAGACTCAGAGGGCGTAGCTGCTGGAGCAGGCCGAGGGCAAATCTGTTCTGACATAACGTTGAGACAAATGCCATTTCTAGGAAAGGAtactctgctgtctcctctgcgtatctcacaggcactcaggtctaacatgttccaagcgtgctccttgcgcgtcctgtccacccgtggtccctgctgagtcctctgagtgcagcaaacagcccctcaggcttcagtggctcaggccccaaacctcggatctgcccttccctcacccaaggacgtcctatctgctgcctgcacatctggttcagaatcaagccctcctaccgctgccaaggtcaggccagggttgtgcccatctctccccatctccccaggcctcctgccctctcctccctcttcttcaagccatcctgagcccaggccagcgagcctggtaaaatgtcatcccccatgatccctcagcttagcaccctcccgtggccactcagagtgaaagccagggtccttcctcacctccacccccttgactctccatgctcacctccccggtctcccctcccctctcactctgcccctcATGAGTCCCATCACAGGCAGGAAGTTctgccttcccagcacctgccaccgagccaggtacacagcaggtgctcaatcaatCCTCTCACCGGCAGCTGCTTCTCCAGGCAGATGCTGAAGGTCTTGGTGTGGTTGGGTTTCAGCTTCTTCAGGGGCACACGTGTCTCCCCGATGAACTCATTGTGCCGGAATTTGTCCTCGTCACACACAGAGATCCTAGAGGGGGCGGTGGTGAGGGGCACAGCCAGTGCCTCAGACGCACTGGGCATGGTGGAGGTGTGCGCAGGTAGGGCCAGCCCTGGCTTCTCCTGCCCCAAGCCCTGCCCTGGTCTGGGGTGGGAGACGCACAAGATGCCTGGGCCCTGACAGGGGCAGAGTGTGGCACGATATCAGGCACTGTCCTCATGGACAAGTGTCCTCAGGTTGGAAGAGGGGACAGGAGAAGGCAGAACCAGTGCCAGGAGTAGCCAGGAGGCTGGGAGAGCCGGTTCTCTGGAGGGAACCACCTCCAGCACCCTGAGAGGCCCCAGGAAGCACCTTCAGGGGACTGGGGCCAGGGTGACCCTTTTCCACCCACCAGGCTCCATTGGCGTGGGCAGACCTTTCTGCCTTTCTTCAAGCCCCCAAGCTGGAGTTGCTGGCATGAGGTTGAACCCACAGGGGACCATCTGACCCTCCAGTCCTGCCTTCATTAGGAAAAGCCGGGTGGGAGTAGGGGTTGGGGAGGGAGCAGGCGGCCTGGGACCCTCACCCACCGCAGGGTCTTGCGGATCATGTCTTCATCTGTGATCCCGTAGTAAGTGAGGGTCTCGTTCCATGTGGGGTTCAGAGTGTTACGGAGAGTTTTTGTTCTGAGCTTATTTGCCTGGAGAAGAGAAAAATGATCTTATTAGCATCAAAGTGtgtatcaaacagaacaatggcccccagagatggccacgtgctcatcttggagcctgtgaatgtgttatcaacatggccaagtggactgaggctgcaggtggacttagggttggtaatgagctgacattagaatagggagattatcctagattgttgggtggcccaatgtggtcacagggttcttaaaagcagaagaatggacagagaagacagtcagggacgtcaccaaggaagggggccagagagatgcaatggggcccgctgtgaaggtggaggaaggggccacaagcccaggagggccgatggcctctagaagctggaaggagctaggaaactgtgggctccccgggctccagaaggaatgcagccttgccgacaccttgattttagcccagagagagaccactgctggacttctaacctgcagagcagtccgagagtaaacgcgctgctttaagccacgaagtttgtggtcatttgttgcagcagccgtaggaggctcatccaAGGAGGACCCCACCTCCAGCCCGATGCCACGGGGTAGGTTCTGCAGGAGGCCTGTGTGGAACTGGAGTCTCCTTCCCAGGGCATTCCCTGCCTTTGTGGACCGTCCCCTCTGCCTGGAACCCATCCCGTCCTTGGGTCTGCCTCGGGGGTGGCCTCTCCGAGCTGGAATATTGCTGCCACTCCCTCCTCCTGTGCCAGCGGCTCTGAGCTCCTTGTCTGAATCGGTCTGATGCCTGCCACACCCGgcccaggctgggaggtcagacagcctgggcttccaattccagctctgtggcagcatcaggttccccactgtggaaagggcacaggaatccctctctcccattgctttcaggagctgtttgtaaggagactgctctttataaaacactagggaaagtcctggGGACTTCCTACAACTCGGCAGCCATGCACTGCCGGCTCCAGTCCCACAAATGAAGGGTCACTGAGCACACTTCCCTGGTCATACTCGCCCTCGGCCCTCATATCCCTGAGCCCTTCTTGCAGCATAAGGGCATCAAGACCCTGTGTGGGGAGCCCATTTCTTCCCCAGGAGTAGGTGGTGGGGCACTGCCATTTCTCCTACAGTCCTGCCTTCCCTAGAGAAGGGGGAAGGGCCCCTTCTGGGGTGGTCCTCCTGGGCTGCTGTGGGCCCCAGGCCTACCCATTTAGGCCTCCACGTAAACTCAGACTCTGCCCCATGAAATTAAAAATAAAACAGCACTGAAGTGTAGGATGCACAGAAGGAAGTCTAATTAAGTTCTGACTTTCCATGGCTCAAGGTCACCTTGATGCTTTTATTTAACAGCAATTTTTCTCTCTCCTGTCCGAGATGCACCTGTTGTGCTGGTGCCCTGAGCACATGTGTGGTCGGACGCTTGGGTCATCCGGCGTAGGAATGAGGGGCAGATTctgcccacttcatcccccttgcggagagtcagggaacagcccatcaacacgttagatgctctgagaagtcctgtggtacagaaacctgtttaacaatgtttccccatgttatttaaccacagagtcctttgtttGCACCTAATAGTAACTTCCAGCTGAGAATGCTTCTTATAAAATGCTGGCCTGGAAATTCTCCAGGGGTTTCCCCAGATCAGCAGGTAGGTAACGCTGACTCGTGAGGTCCCCAGGAGGCAACAGGGTGTGGGGCAGACTGGGTGCTCTGTGTGCAGTTGAATGGCTGAGGTCCCATCAGCTTGCTTGGACTCTGAGGGGAGTGGCAGCTGTGGGCCTCCCTGGGTGCCCGCAGCCCAGTCCAGGCCCCAAAGCAAAAGGACCAGAGTGCATTGCCTGAGGTGGTGGGCGGGTGCAGCTGGTGGGCAGGCCTGGGTGGAACCCCTCACCTTACTGGCTCCTGGCAGCAGGTGCAGCTTGACGTAGGGGTCTGCCAGCCCATTGTGGTCCATTGGCTTCAGGCCCTGGGCAGAGAAGAGCAAACGGTGTGAACTGGAAATCGGGGACATGGAACTGACAGGGCCTGCAGATGCCCTTGTCCCCTGGGTCTCCTGGCTGGGCCCATTCATGGCCCCTTTCACAGAAGCCCCCGGGCCCCACACCTCCAGATGGAGGGAGTGAGGATGGCTCAACGCTGATGCAATCTCCCCTCCCCAAAACATCCTGGCCCAGGGCAGCTGTCATGGGGCCTCTGTCAGGACAGACTTTCTCTTGTCCTCATCCTGGAAGCCAGGTGGCTCCCCCTTCTCTGAGCCTCCCTCTCCACCCTGCTTTGCTCAGGCCCTGCCATCATCCTGACTCCCGTCTCCCATCTACCAAGCGTGGCCTCCTGAGTGGCCATGCCCCTCACTGACCCTAAATCCACATAACCCCGGGACACAGTCTGAGACTAGGCCCCATTTCTGGCCATATTTGCCACAACTCCCAGTCAGTCCACAGAGATCACACAAGCAGACTGGCCACAGAGGGTCCCTCCACAAGATGGCACCCCTCTCTCACTCTGGTTCTTTCAGAAAACCCAGCCACGCGTGCACAGCCAGAGGCACACAGAAGCATGGACAGAGAGGGGCTTTGCTGACCTCAGCAGGCTCTTGTCTCTGAAAATTGCATTTGTTTCCTTCTGGGATGGTGCACAGGGATGCAGGGAGGCAGCTGTGGCATCTGTGGGGGTGCAGCCGGCAACCCCAGGATAGTATTCAAAGGGCTTCTACCCAGCCGGGTCAGGGAGGCAGCTCGGCGGCTCACACCAGGCCCATCTCCAAGGGGAGGCTGGGGCTCCCTCCCAGGCCCACCCCACCTCGGTTTGACGGAGCTCTGGGGGATGCGGCAGGGGCTTCCAACCACCTACTTCCCTGCcactggcaaagggcagaggtttgggggcctggctggcttgggctccaggctcagctcctgttcaccagctgggaagtggatgagttactttgcacgctaagcctcagctttatcatctgttaaaggggttggccgggcacggtggctcacgcctgtaatcccagcgcttgagcccaggagttcaagaccagcctgggcaacacagtaagaccctgtctcttcaaaaaactaaaaaattagctgggtggggtggcatgtgcctgtggttccagcaactagagaagctgaggtgggaggatcgcttgaacctgggacacggaggctgcagtgaaccatgatcgcaccactgcactccagcctgggtgacagagtgagaccctgtctcaagtttaaaaaaaaaaaaaaaaaaagggcgggggtgtaatactcccaccttcctagggctggagtgagagggagggagactgtgtggacacagggcctggcaggcagGGTTTGCTCCCTTCCCTGCACGCCAGGCCCCCGTGAGACGCCGGAGAAGCGGTTGAGGTCTGCTGGGTGCATCTGAGCTGTGCCCTGCACTGTGGGGCTGCCTCTGGGACAGGGCCTGGCTCAGTGGCCCAACACGTGCTCAAGGTGACAAGGCTGCTCTTCCAAACCCAACTCCCTCACCCTGCCTTGCACCTTTCCCACCAAGTGCCTGAAGTGCAGAGTTCTGGGAATCTGGTGGCCTGGCCTGACCAGGCGGTGCCTGACTGCTGTGTCTTATTCTTACAGTCCTTATACATTTGTTTGTTGTAGCTTAAAATGTTTCCATTTGGGCCACGTTTCCTGACACGTTTCACTTACAAACCTTTGGGCCACCACATGTTCTGGCGTATGTCTTTCCAGTGGCTCTGGTGGTCCCACCTCCGTACCCCGAGACACCCGTGGGAGCTAGGCTGGGTCGAGGGGCGCTGTCATGCTGGGACGGGAGTGATGCCTGCTCTGTCCTTCAGACCCCGGGACTGTGACGAGGCCCCTGCTGCTGCTCTCTACTCCTCCACCCGGCAGTTCCCTGAGCACCCTGGGCAGCCACACATTTCGCCAAAAGCAAAAGGACCAGCAGGCACTGCCTGAAGCCCTGTCACTGTGCTGCCACCAACTTCTGTGCCCAAACAGGCAGCTTCCCTGGTCCTGACCCGGGGTTCCGCCAGTGCCTCCACCTTCTGTGGGGCTGGCCTCCCACGGAGCCTGACCTCTGTCCACGGAAGGGGAAGGTCGGGAGGCTGTTTCCAGGGCAGGGAGCTGCTAGTGGGGCCCTTGGGCACATGCTCCCCAGCTTGGGAGTTGGCAAGAGACATAAACTGATCCCATGGTTTAGGGGTGGGCCTCACGGGAGGtgacgtggtttggcggtgtccccacccaagtctcatctggagttgtactcccataattcctacaggttgtgggaaggacccagcgggagataactgaatcacaaggggaggtttctcccagactgttcttgacgtagtgaatacgtctcacgagatctgatgatctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACACTCTTCTTGACGTAGTGAATACGTCTCACGAGATCTCATGGTCTGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctgatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctgatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctgatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActgttcttgaggtagtgaatacgtctcacgagatctcatggtctGACActcttcttgaggtagtgaatacatctcacgagatctgatggcctgataaggggaaacccgttttccttggctctcattctctctcttgccaccaaacatgtgagaagtgcctttcactttcagccataactgtgaggtcttctcagccacgtgtaacggcaagtccaataaacctctttcctttataaattacacagtctcaggtatgtctttatcggcagcatgaaaatggactaatacaGGGAGGATGGGTGGAAGCGGCCCCGGGGGAGGCCCTGGCTGGTACTGGCACTGAGGGAAGAGATGGGGGGTCTGGCTTTGAGAGGAGAGCTTCTCCCCAAAAACCTAGCCCTGCCCCGCCCTGGGCCTCTCAGAGGCTGTTGCTGGTGAAGTGTTCGGAAGAGGAGCTTTCTAGTCtgaagtatcattcagcctgaaaaggaagttttgacacgtgctgcaatggggatgaagcctgaagacattctgcggagtgaaagaaggcagactcaaaaggacagatcccggggactgcagactcaaaaggacagataccgggagactgcacttacatgaggtccctagaatagtcaaatccatagagaaggaagccaaatggcagcctccccaggggccaggggaggaggaagggggagctgttgtttaatgggtccagtttagttttgcaagctgaagagggctctggagatgggtttcacagcagtgtgaaggtagaaggtacttagcacaactgaactgtacgctaaaaatggttgagCGCTGAGGGAAGAAGTAAAAAAAAGTGGTTGAGGTGGGAAATGTATATCTGTGTATTTTACCACAATAAAAATAAAAAGTCTCCCAGAACTGGTAgtgccaggggccacgtgttaactcatttaatgctcacaacaggcatgtagggcagggacaaccaaccctatttacagatgggcaaactgagactGACCCTTATAAGGGGGGACAAGCAAGGGTGCACCCCAGGGTGTCCAGCCCCCACCCTGGCCCTCCAGAGGCCAGCCCTCCTTCAGCTCACCCACCCTGGGCCCCTCCCCACACCCCAGCCCAGAGCCCCAGCTCTTCCCCAGCCTGCACCACCCCTTCCCTACAGAACTGGATTTACACAGAGAAGGAACTGGGCCTCCCACCCCCACTTCTGATACCTGAGGGATACAGCCCAAAGTGGacacacacttacatgtgtgcacgcacggtaccacacatgtacacacagagacacacatacagccatgtatgtgcatacacacaaacgcacCTGGAGCTGGGAAGGGAAGGCCCTGGTGTCTGGCATGGAGAGAGGAAGGGGTGGGCTTTGGCCAGAGTGGCCTGGCAGCCGGCACCTCTCCAGTCCCCAGGCCTGGACCACCTCTACAAAGTTGGACAGAGGGAAAGGAGGAAGGGTCTAGCTTGGTCTCTACCTTGGCACAGCTGGGATTTGACAAATGCTCAGTTCTGCTCCTAGGGGTGGGCTGGAGCCCCCGCCAGGCAGGGCTGGACATGCCCTGAGTCATAGCATGGGTGGTTCTAGAGAGGGCAGGGGTGGGATGGAGCGTGCAGGCCTCTCAGTGCCCTACCAGGGCCCTGAGGCTTGCGTGGATGGCACTCACACCTACCCATGGCAGTCCACATGTGGCCCAGGCTGGGCTGGGGGACAGCCTGGGGTGGCACGCAGTAGCCTGTCCTGCTGGGTGAGCATGCTGCCAAGGGCAGCCCTTGCTGCCAGGCTGGGAGGAGGGGCAGGGGGCCTGCAGGTTGGGAGGCTGGGTGGGGCCTGGGCCCAGGCAGCTCTGTGGGAAGCCGCTGGATCTGAGCTGGGCTGGCTCAGGCCCTTACATGGCACTACTAGGGAGACTCTACTGGCCATGCAGGCCCTTACCTTGCGAGAGAACATCAATTTTGGCACCTTCCTCCCACAGGGAGCAATGGGGTGAGGGGAAGGGAACAGGACAGTTGAGAACATGGAGCTGACACATGCTTGAGTGGCAGAGCCAGAGGGCAGCACCAGGGACCAGGCCAGGCTGCAGAGGGGAGCACCAGgggccgggccaggctgcagagggcagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgCAGAGGGGAGAGCACCAGGGGCCGGGCCAGGCTGCAGAGGGGAGAGCACCAGgggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgCAGAGGGGAGAGCACCAGGGGCCGGGCCAGGCTGCAGAGGGGAGAGCACCAGGGGCCGGGCCAGGCTGCAGAGGGGAGAGCACCAGGGGCCGGGCCAGGCTGCAGAGGGCGGCACCAGGGGCCGGGCCAGGTTGCAGAGGGGAGCACCAGgggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgcagaggggagagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgCAGAGGGGAGAGCACCAGGGgccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagaggggagagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgCAGAGGGGAGAGCACCAGGGgccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagagggcagcaccaggggccgggccaggctgcagaggggagagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagaggggagcaccaggggccgggccaggctgcagaggggagcaccagcggccgggccaggctgcagaggggTCCACAGGCACCCACAACCCCAGCCCACGTAGTGAGGCTCAGAGGGCCTCTGGGCTCAGGCCGTGGACACCCTGCCTGGAGTGGCATCGGCCTCCTACAGTGGCTCGGCTTCCAGGGTGCAAAGTGGCGTCCCCACTCCTCAGGGCGTCTGGGAGGCCTGGAGGCACCAGTACCCAACCCGCCCTCCCTTGGCCTGCAGGACAGACATCACCCTGCCCCTCTCTTTCCCTCTCAGCAGCCCCTCCCCAGGCTCGAGGGTCCTCGGTCCAGGCCTTCATCTTCCCATTCTCATCTGTTTCTTTGCTCCCCGCAATGCCTGACTGTCCAAGGCATTTCTTGGGGTTGGGTATTCAAGAAGGTTTAAAGAAGAATTCCTTCCTGGCCCCGCACCCCACACAGCGCAGACATCCAAAAGCCTGGACAGGAACCTGGGGAGTGGTGTGGTCTGGCCTCCCTGACCTAGGCCCTCTTGAGGACCCCGGGGCAGGGAATTTGGGGGCAGGCTGGCGGGGCCTACCTTGGCCTTGGTGATGGTGCAGTGGAGGGCGTTGTTCTCCTGGTCATACAGCAGGCTGAAGTCCAGCGTGCCCAGGGCAGCTGCGGACAGAGGAGGGCACAGGTCCCACCCTGGCCGCATCTTGGAGAGGCTTCGCCTGCCCCTGTGAGACCAGATGAGCCTGGCCTGGGCAGGTGCCACCGTTCCATGGCGgctgccaccaaccaagcacctgctctatgccagcccctcacccatcctcccagtcccacccaaccctgggaagtcacaataatctccccactttccagaggaggagctgagacccagagaggtcaggtgggtcactccagttcCCTGTCTGGCCCAGTGTGTGGTCCACCTGAGCTGGGCACCCAGCCAAAGGAGTTCCTGCCCTCCTGGTGCAACTGCCAGTCTGGGccccgtgcctcagtttccctcacctgtgaaatgTCACAAGGATCACACAAGGCGGAGGAGACGAGGCTTTGAGAGGAACAGGTCCTGGCCAGGAAGATCAGCTGATTTGCTCAACAGTCCCCCAGCCAACACACAGGACTGCAGCTCCTCTGTCTCTCTGGCCCGTTGGGAACCCCGAGCAGGCCGTGAGGAGCCAGCTGGGTCCTCATACTGCTGCCCCCAAGTCTCTCAATGGCAGTGGTAACTCCCAAAAGCCGGGGGGAGGGTGCAGCCATAATTGGGGGAGGTTGCAGCCATAATTGGGTGCAGCTGCCTCCCTCCCCGGGGGCAATCACTCATAGCAGCTGTGGCTTTCCATGCAGAAGCGGCTCCCAGCATGGAGGCCAAGGTGATGGTTGGGGCAGAGCTTGGAGATGATGGTGGGGGGCAGAGCTTAGTGGCAGTCCGCAGACAGCAAGATGCACATTCACAGATGGCTTCAGAAGCCCAGAGCCTGCTCCCAGGCTGCAGGGCTGGTCAAATGGTGTCACattccttcattatttaccaagtgtttacaacatgccagactctaggggatggacatctgtgaggcagagtccctatcccaaggaaagcacagcttaaggggaggaacaaatggaaataatttgccgcagtagagtctggtgagccgggagccatggggagcccatgtgaattgggaccaccagggaaggcttcctggaggaggtgatgcttcacctgagccatgagggatgagtaggagttggccaatggcaaagaggggctgggggaggtgacgattccaaccacaggccaaccagcaggtacgggcttagtggctggtgtgatcccggGATAAGGGAAGGCCAAACTTGAAAGACTGTGTTCTTCAGGCTCTGAGCTAGGCTCCCAACCTGGATCTAGATCCAATTACGGCCACACCTGAccaggcccatcatttacatctcttctgcggctgctttcctacttcagaagcagaccctacggcctgcacagctgaaaatatttactatccagcccttctttatagaacaagtttgcagatctctgGGCGAGTGCTGGGCCCAGGCATAACAAAGTTTCTTGGGTCTGCAGGGAAATTGGCTAATGAAAGGCCACGAGTGTGGAGTGTCCTTTATTCTGAGCTGGTGATGGGAGTGGCATTTGTCTAGATCATCCCCTTCTAACACGGGTCAGGCACATGTCTCTGTCTTCGCAgcagtgtggggtgcaagacctgccctctgacatcagtctcctcggttcaaatcccagcgtgcacttccctgctgtgtggccagcttatctaacgtctttatgcctcagtttactcatctgtgaaatggagacaacgatagtatccacctcacagcacagtgtgcctgcggtgtaggaggtgctcagtagatcattatagaaggagcagatCCCTATGGTGGGTTTTCTAGCTAACAGTACTGGCTTCTCTAAGACCCATTCTGGAATGAAACTGTCCTCAATTGGCTCATTTCCCTCCCTGCCTTTGGGAACAGAACATACAAATAGTCACTTAAATTTTTTGAATAACCTGAGTCATTGCTATGGCCACGATGACTCTTCCGTATGTCACGGTCATGTTCCAGAGGCAAAAGGGGACACGAAGCAACCCCTGCACCAGAGGGTCCCGGGTGGCCCTCTTTTCGGTTTTCTTCATTTTTCTGCCTCTTCCTTTGACATCAGCCTGAAACTCCTATTAGATTACTTGTCTGTGTTGGCGCAGACAGCTCTCACCTATTCAATAATTGTTTCTCCCATGAGACCAGGTGATGATACTTGTTGGAAGTGTGTGCAAACTAAGAGCCAACAGGCCTTGAGTGGCTggtgtggggctcatgcctgtcatcccagcagtttgggaggccgaagcagatggatcacttgagctcaggagttcgagaccagcctggccaacatggcaacaaaaactacaaaaatggccgggtgtggtggctcacgcctataatcccagcactttgggaggctgagacgggtggatcacttgagcccaggatttcaagaccggcctggccaacatggaaacacccatttctacaaaaaatacaaaaatagccgggcatggtggcgcatgcctgtggtcccagctactcgggaggctgaggtgggaggaccgcttgagcccaggaagttgaggctacagtgagtgtgattgtgccactgcatccaacctaggcgacagagcgagatcctgtctcaaaaaaataaataaaTACTAGGTCTTGAGTGAAGCCTGAGGCACGTTCTCCTGAGCAGGGTATGTGTGCAGCAGCAATAGCTGTAGCTGGCCTTCAGCTGCCTGCATGCTAGCTGCCATCTCCACACCAGGCAGGTGAGGCTGCTGGAAGGGAGGTGTATCCCACTGCTCCTGCAGCAATGAACACAGCCACAGCCCGAGATGCACCAATCTGTACTTTTGTAACAATATTCTCTGAAGCTGTTGGCTTTACATGATAGTAAATGGATCTGCTGTTCAGGTCAGGCCTACCTGGGgtttgttcccccaacgaattagccccctgagcacagggaccatgccttggccacacctgtgagaccaacaaacagcagatgcagacacacaTGGCTGGTCCATTCAAACCAACTTGCCCTCCAGGTGCTCCCAGGAGCTGGGATCTGGTGTGACACCCAAGTGTAAAAATGCACATTCTGATTTCTGCCTGTTTCCCAACCCCAGAGAAGGACAGCCAGAAGACAGAGCGGCTGCCTGCTCCCCCTGGGACACCCAGCTCCTGGAGGGGAGAAGCCCCTGCACTGGCTCTACAGAAACCCCTGTCCAAGGAGGGCAGCGATCTTCTCCAACTGCCTGGGGGGAAAACTGACACCCGTCACCACCTCCTGGGAAGGAACAGCACACCCAGTGTGTCACCAGGAAGGGACTGATGAATTTCTTTGAACTCCACTGGAAGGTCTGTAAGAAATGATAGTTTATAAATAGAAGCTCGGTCACAGTTTTGAGAGGCTGGTGAAGCTCTcttacggagcacatggcccaaactcttcatttccctttagagaaatgaggctcagagaggggaggggagctgctcagcgtggcccagcagacccaggcacagaatctaggcctcttcacttccatcctgatagtttctcccttaaagcctactggctcCCAAGAAGttttttttttttgagacagggtcttgttctgttacccaggctggagtacagtggtgcgatcacagttcagcctcccaggctcaaacgatcctctcacctcagcctcccaagtagctgggactacaggcgcccaccaccatgcccggctaacttaaaaaatatatttacaaaaaaagagacagtggtgttggtgcggggggtgcgggggggtaggtctcactacgttgctcaggctggtcttgaactcccgggctcaagcaatcctcccgccttcacctcccaaagtgctgggattacaggcacacacgggccactctgcccTGGTCCAAAaagcttttttttggccactttgtttagctgaaatcttaggaggatgcaactaagaaacaaaaagagagaaaagcagagttgctctaggggaaggagagtgggggctcagCCGCTGCCCTGGAGAGCCCTCCGAGTTGGAAAAGCACCTTGGGCTTGTGCTGGGCGTGAGGAGGGCCCTAAGGGCAGGAGAAGGTGCTTCTCTGTGACCCACAGTGCCCTGCCCTTGGAGCGCTCCAATCACATGACCCCACTGAAGTTTACATCTTCGGAAAAATAATGAGGGTTGGTCACTAATGGGCCTTTGAGAGTTGTCACCCGCTTGTCGATAATTACAGAGCTTTAATGGGGCTAATTCAGAGAAAATCAAGCTCAAGCCCCTGTTGGCTCCTCTAAGGCCGAGCACCCCCTGCTTGGCTGATGTAGACCTGACAGCATCATCATAACCTTGGTTTGCACCTGGACTTTCCTGACACCCTTGGGAATAGGAAATCTGCCCTGAttttttttttttttttttttAAGAAAAATGTCAGGCTCTCAAATACTTTAAAAAAAACAAAACAATTGTGGCAAGGACCATAGAAAATCATCAAGCTTGTGTTGAAAGCATTTTATAGTTGGGGAAACAGCCATTCAGCCAGGCCGGCACtgcaggggggacagagagaaatgagccatcatccctgcttcccaggagcttagagacgagtgTTAAGATTTTTATAAACATCTtcattcatctgctttaaattgttgaaaggttctccatcaactcctggactcagttcaacctccttggtgtggcattcaagaccttgtgtgatatgaccactttggcctcctcataggtctcttccccttctttgagccacttggctctactgttttgcctacactgggctgagtttctgacacttcaggacctgtgtacacagctgtccctctgcgaagagcacctccctttcctcttttccctctgtcagcccacctcctccctcctcccagtcttagggctgagccctctctgtgagtgacctccgcctgccagagctttccatctgggttcttgcctccctgtccctcgctgtggggtcacctgtttgtcttggttttgatgaacaccctcaggtcagaggttgcctctcagcattcttgtatctctgtgcctaacacagagcctgccacatagttggtgcCGTAACATCGAGTGCATTACAAATATCACAGGTCCAGGTAGAACATGACTGGGGCCAAAGAGGTTCTGGGCTCAAAGGAAGGGGCACCACAGCCCATGGGGGCTGGAATATGCCCCCTAACACGGAGAACCTTTGTGGTGGACTCAACCCCTTGTCCTTGCTCCAGACCTGCACAGGCATCGGTCCTGACCCGGTCCTGGGTGAACCACACAGGGCAggtttaacacgggcatgtgatccaattctggccaatgagacaaaaggacaggtctcctggggacttctgggagaggtttcctcgctcttaaactgagacatgagaaaaggaatcggtccttaccaattccttacccagtcaaggaccttattgttacatgtgatgcctggacctgcagcagccaccttggacctgagggTCCTGCTTGGCTGATGGAGACTGACAGCGTCATTCTGACTTTGGTCTGCACCTGAACCTTCCTCACCCAGACACCCTTAGGAATAGGAAACGGGCCTTGATTTTGCTCTACAAGGAAAAACGCACAGCCACGAGCAAAGATGGACAGAACCATCACCCCAACAGTGCTGAGGGGCTGCTGCATTGGTGCACCCTGGTGCAGCAAGACCTCAGCTGtcctcattgtttaagccactctgagttggttatctgtcacttgcaacccatggcactctaGTGATTACagctttgaagaatggactgggcttccacaggtgggtgccgggcaggagagcctctcaggtaggggcagcgtgtgGGGTGTGTGGCCCACGGCCCACCTGGAGGGCAGCACGCCACAGAGGCCACCCTAGGAGAACTGGCAGGGCCGACACTGGGTCCTGCTCCTCATTGGGACATTCACTGCCTTTCCTCCCTGTCATCGCCCCTCTTCGAGATCTGCTGGGAACATGCGAGTTATGTGCTTGTGCTTTGACATGACTTAATAAAGAAAGGGCTTTAAAAAGCCAGGAGAAAAGGCTGAGTAAATAAACAGCACTTCACTCTCTGCCTTGGACAACAGAGCAACCGCCCTCATTTCTTTTTGTCAGAACAGAGATAATCCAATAATTAGGGCAGCAAATAGCATAACCCTGGCTAAAGCCGTAATGAACCATCTGGCTTGAATCATTGAGGCTTATTCTAAGGATCTGGTTGTCACAGCCCAGAGGGGGGCACCGCCTGGGGTACTGGGTGGAAGGCAGTCCCACCCCAGAACCTGTTCCCCCACCATGGACAAGAGCCAAGATTTCTGGGCTTCTGGGCCTAGGATGTTTGTCAGCCTCCCATAGGATCCAAGCCTAGGGTGGGGCCCCCCTCGAAGGGGGCGACAGGGGCCCAAGCTGGCCCTAGCTCAGTGGCACACCGTCCTCCTTAGCTCCTCTGAGCCCGTCTTCAGCATTTCTTTGGCATGTCCTGATAATACGGGTGTGGCCAGCCCACAGTTCAAGCTGGCAAAGCTGCATCCAGCTCTGCCCTGCACTGCTGACCTCATGTGAGTCTGGCCCCTGCACCGCATGTATCTCAGGCCAGGGGCCCAGCCCTGCTGCCAGGACATGCTCCCTGAAGGGCTGCTGAGCTGATGGATTGGAGATGGGGCTGGCTGGCCTGAGTTTCTAGCTTCCTGGTCCCCTCCTTGCTTTGTGGGTTCAGAGACAGCAAAAAAAAGAAAAAGAAAAAGCCTCTTGGCCATTCGCCATGCAGAGGGCCCTCTGTGGCACCAGGAGGGTGGCCTGAAGCTCTGCCCTCCCCTCTTACTTCCTTGGTGCAGCAGCAGAGATGCCAGAAATGGGAACAACTTTCCCATCCGTTCTTCTGGGGGAGGCTTTGGATTCAGGGCAGCCAAAGCAGTTACTTGGGTCCCACTCAGGGCCCACCCAGGGGATCACCAAGTCCAGGGTGAGCTTCGCACAGGGTGCCAGGGGCAGAGGGGAGGGGGGGTGGATCTAGGGCACAGCCCTGAGGGCAAAAAACTCTTCCGACCCAATCTCCCCAAGCTGGCAGGAAAGTGGAGGGACAAGACTGCTCCCCAGCCCCCACGCCCCAGGGCAGGGCCTTCATGTGCCAGGCGCTGGCCCGAGGGCCGTGGTCCAGGCCTCTAGAAAGTGCAAAGCAGGCAGTTTCCCCTACAGGGGCCCTGCTCTAACCGGCCACTGCTGATGGGCCTCCCCAGGTGGGGCGATGGGGGGTCTGTGCCCCGGGGGCACTGGTAATCCCTACCTTCAGCTTCTGGTGGCACATTTGATGCTTGGGAAACTCCAGGCCCGCAGCCCACAGGCCCTGGTGAGTGCCCAGGCCAGGCGCAGACATCCCTGCTGCGCAGGGGAGGGGCAGCACCAGCCCTGGAGAAGGGCCACATCCCGGGAAGGGCTGGGGTTTGACGAGACGCTGGTTTTCCAGGTCTCAGTGACAAGTCTGGAGCCACAGCTGAGCTAGGAGGGGGTTCTCACATGCCATCCCCACCCCGCGCAAACCGACTCCTCACTGGACTGCGACCTCTTCCGGCCTCGGTTTCCCAGCCAGTCCCGGCTCGGGCCGGACAGGCACCCTCGGGGACGGGAAAAGGCGCCAGGAGCGCCCACCGGCCGGGCCTCGGTCCCGGGACTCCGGCGCTTGCCTGCTCCCGGGGGCTCAGGGCTCAGTCCGGGAGGAGGGGGAGCGGGCTGGGGGGCCCTCTCTGCCCGGGGGCCGCGGGCGTAACAGGTGGGCGAAGGTGCGCGGCCCTGGCGAGGGCTGCGGCGGGGTCCATGGACACCGGAGGAGGAAACGCCAAGGTTTTTCCAAAGGACAAGCGGCCCCGCGGTCCTCCTGGTCCTCTGCTCGCGCGCCAGCAAAGCAGCTGCGCTCTGCGGGCCGCCGGGACCACGCGGGAGGCCGGGCCGCTCCCAGCCTCGGGCCCCTCCCCAGCTCGCCCCAGCCCCGACCCTCGGCCGCGAGGCCCTCCCGGAGCGGCTGGCGAGCGGGGAGCGACCGCGCGGCCGGCAGCAACTGGTGTCTCCCCGGGACGCAGCTCCGCCCTTCCCGGGAACAAAAGCAGCCGCCCGCGCCGGAGCTCCGGGAGGGCGGGCTGGCAGGGAGGGGGCGCGGCGCCGGCTCCGAGGAACCCGGCCCCGGAAATGGGACACCCCCAGGGGGTGCCCCCGAACTTCCCTCCTGTCCGGCCTGGGTCGGGGGAGGGCTCGAGGCCGGTGGGCAGGGCGCGGAGAGCGCACCGAGTGCGCCAGGGGCCCGCAAGCCCGCGGCGGGGTTGTGAACCGAGGCAGAGCGCGAGCGCGCGAGGGGGACCGGCGGAGGGAAGCCGCGAGGCCGTGGGGGGGCCGAGCCCGAGCCAGGGGAGGGGGCGCGAAGTCGGCGCGTGGGAAACTTACTGCAGTCGTCCGACTCGTAGCCGTCGGCGTCCGGCTCGTCCTCCGGCGGCTTGGCTGGCGGCCGCGCggggctgggacccgggctggggcccgggctggagccgTAGGCTCCGAAGAGCTGGTCCACATCCTCGTCGTCCTcgcgggcgccgtcggaggggctgcggcggccggcaccggccacagccgggcgcgcgggggcgtccgggggtgcagcggctcggggcccggcgtccgggggcaggccccgcgggAAGCGGGGGAAGTAGTCGGAGATCTGCTTGATGGGACGGATGGGGCCGGGGCACACGTCGATGGCCATATGCTCCTGGATGCTGATGGTCGCCTTCTCCCCGCGCCGCCGGAGGGTCATGCAGGCAGCGccgccccgccccgggcgcggcccggcccggcgcgaccccggcccgggggcggctcagcaggcccggcggggcgcggcGGGGGCTGCGGGCATCGCCGGCCGCGCCCCCGGACGGCCCTGACTTGGCCGCTGCCCGCTCCGCTGCGGACGGCGCGAGCGAGTGCCAGAGGCCGGCAGGCAGGGGGCGGGCCCAGCCCGCGTCACCCGGCAGCAACCAAGCAGGGTGAGTGTGCGGGCTGCGCGGGCGGCGCGGAGCGGAGGGAGCCGCGCGGCGCCACACTCACTCGCACTCGCACTCACACCGGCGTGCACGCCGGCCCGGGACCCCGCGCGCGCACACTCGCGGCCAGGCAGGGCCGCCGGGCGCCTTCCGCTCATGCACGCCCGCGGCACAAGCTGGGAGTCAACCCGGCAGGGACCCGCAGGACGCGCACCCACACGTTCCCGCGCGTGCCAGCCCACGCTGGGCTCCGCTCGCTCTCTCGGGACACACGCAGGTGTGCAAGTGCACACATGCGTGTGCAGAGACACGTGGTGGAAGCATCCGCTCGCTCACACCGTAGGTCACACACGCAAACGTCTGCATGCGCACATGGTTGTTTTAGGAAGCTGTGACACAGTACACCCCCAATGCACAGGCGCGCACACCTGATGGAGCACACACACAGGTGATCAAGGGCACCCAGGGCACAGGCTTCCCTACCCCCAAGCACCCCTAACAAGATGCACAAACATGAGCCCATAGAAGTGATCAGGGGACCCTGGGCACAACTCCTTCTCTCCTCCCCCTCTAGCAGGACGTGGAGTCACACTCCTAGCATGTAGGGACAGGTTGCCTACACACGGGCAGGACATGTACACGCTCAACTGCACAAGGACACCGGGGCTCAGTCTGTGCACACATATTCTCAGGTCACATACAACACAGCACTCTAACCTCGGGTCAGTCCCTCACTTGGTCAGTGAGTACAGTCTGAAGGACCCCCCGGGGCAGGGTGGCTGAGAACCTGCACAGGGCCTGGTCAGGAGGGAGTGAGGGCAGGGCCTGGGCGGTGAGTGATGCAATAAGGCTGGGCGGCACGCTCCCCCCACCCCCACTCCTACTCTAGGCCTCCCACACGGTCAGATCACTAAACAAATCCCAGAGGGCCCAGCCCTGGCTGTCCGGCTTTCCGGGACCAGAGCTCTGTTGGGAACTGCTGCTGCTTGGACAGGTGTGTTCCCGGAAAGCCCTGGGCATGGATGGAATCCTGTTTACCCTCTGGTTTCCACTGATGTGTAAGACACTTAGCTTCTTAGTGGGTGCCTTTGGTGCATCTGAATGAGGGGCTCCAAGCCTCTCTTGTTCCCCCACCATAACCCCTGCAGAGTGATGGGGAGCAGAGGAAAGAGAGGcaaagccttggcctgtggcttccagctgcacagttctggcaggctatttgacctctttgagcctcggtttcctcatctatgaagtgaggctatttccaactgcacagccttgtggcaaggccccatccagcacagacaggtaagaggtgctcagCTGGCTTTCCCTTCTCCCTTCCTTTGGAAAGACAAGACTCATGGTGAGAAGTGATGAGAAATTCATGTTTTGTGGTGAGTTCTGAACTGGGTGGTGGCGGGCACTACCGGCCTTTGAAAACACTGGAGAAACACATGGCATATGTTgtactgcgccaagaacattcacagctgtcatctcattgattccttaaaccaccccacgatgtaggcagggcctgctgttcccatttcacaggggaggaaattagcgctcaggcacaaggatgtgtccagggtgactgctggctggcggcagagctgcgatgagagcccagtgtcctgactgttcgatgcttccactttccctcccttctcccttTCCTCCCCTCCACTACAGAGCTCAGGGGCTCAGAGCAGAGTTGGAAACACAGGTAAAACCTCGTTCCCAAAGCTCATCCTGAGGCTTCTTGGACAGGGGAAGCCCAAACTgaggaggaggggaaggagggaaaaaaaggaggaggaagaggaggacgggaggaggCCAAGAGCCTCAGGGGTTACAGTGGGAATGAACCAGCCGGGGTTCCCTAAAGATAGTCTGAGGTCCTGGTGGGAGAAATATTCAGCCTTCCAAGAGCCAAAGGCCAAGGAAAGGACAGAAGAGCCTGGAAGGGCAGCCTGGCACAGAGGGGTTCTCATTTCCAGGAATGCTTAAGGGGATTAATCCTTAAATAGGCACAGGGTGACTTTTTCTGTGCCAGAGGTGGCTGCATCACAAAATGATTACATAATGACGCAGGTATTAAAATACAGACGCTGAACTGTCATTTTATCATTCCTTTACTAAGGAGCTGGAGGGtgtcgtcatcatcttacagatggggaaactgaggctgcgggaggtcaagtgactagcaagaggcagactggagatgagacctggacgtcctgactGCTGAGCCTGCCAAGGCCCTGTACCTGTGTTCACCGAGAGCTGGCCGCTCTCTCCTGGCCTCTTGTCCTATGGGTCTGGTTTTTGGAGGATCGGCTCATGGCTCTTGGCTCTGCAGGAGCTGGCTCTTGGGGAGGGCTTTGAAGAAGTCAGGTGGAGGGCCCAGCCTCCTAAGCATGGAGCCAGGGAACCCAAGGATGCCCACTGGAGAAACACATGGCATGTTGGGGTCTACCCTTTGGCAGGTGGCAGAGTTGAAATCCACTCTAGGTCTGAATTTGCCCAGCTCTGAGCCCAGCTGAAATGGGGTAGGGCCTCCCCGAGGGATAGAAGTGGATAGAAGTGGTGTTATCTGGGCAAGTGTCCACTTTCtagaaagagcataggtcagggagtaggcagacgtagagtcaaaagtcaggtgtgcctcctcctaagcgtaccggactttgggcaagttgtttaacttaggccctgggtctcatctgcaatgggggagaataagaacaacgttgtgtgagttaaaggtgaggatgggtatacagtgcttagcacagtgcccagtgtgcaactggcactcagggaattgtgattctgttGCCCCTGCCTTCCTGGTGCAAACCGTCCCATTGCAAATCTTCCTGGTGCAAACCGTCGTGGTGCAAACCTTCCTGGTGCAAATCGTCGTGGTGCAAACCTTCCTGGTGCAAACCGTCCCAGTGCAAAACTTCCTGGTGCAAATCGTCCTGGTGCAAACCTTCCTGATGCAAACTGTCATTGCGCTCCCAGTGCCTGCCTTGATTTCTCTACCAGTGACATGTGGTTGGCTGCTCCCTGTCTGCTGCCAGGACCAGGTGAGAAATGGATGCACTTGCCAAGGCTGGGCGCTGGCTGGCATGTGTGGGCATCTCTAAGCAGTTGGATATGTCCAAAGGCTCATCAATCATGTTGCCTTCCATTCCCCATGCTGAGGTGGGCAGCTGGGCAGCTGGACCAGCCTCTGGCAAAGTTAAGTGGATGGAGTCTGCCCTTGGTCACAGTCCTCAGAAGCCCTGTGCCCTTGCCTCTCTGCCTTCTGCTTTTCCCAACAAGCTTCTGAGCTTTCTCCCAACTCCCCACAGACCCTCTCAAAGGCTCCTCCTGTCAAGTGAGATAcagtgggttctttaaaaaatggtccaggtatctgtagcactagcatgtctatttactttgtcactgaggctggagtgcagtggtgtgatcatggctcactgcaacctcgacctcccgaggctcaagtgatcttcccaactcagcctctcaagtatctgggaccaaatgcatgcaccaccatccccagctaatttttaaaatttttgtagagacggggtcttgccatgttaccctggctagtctcaaactcctggctttaaatgattttcctgccctggcctctcaaaatgctgagattacaagcgtgagttgccacacccagcccaggatgtctatcaataatgcagattccaggcccccatctcacacccactgactcagaatatgtgtgtgcacactcaggatgcatcttaacaagcgcccctgatgattctggtgcacagtgaaggttgagactcgctgGGTTAGAGGGTGCTAATGGTTTAACGGTGATTTTCAACCTGCTGGGCTGCCTCTATACTGTTCACATGTGTAACATATGCCCATCAGTGACATCTCTCAATATTTAATGATTCTTCACTGGGGAAGTGAGTGACTAACCCAAGGCAGGTGTTGGAATTTCCAGAGAGGTTTGGCAAAGCCACAGTGGGGAGTCTGATCTCCTCCTGTTTGGGTATTCTGACCTCTTTCCCGGTGGAGAAGtgttgggagcaggccccccaaaatctagccataaactggccccaaaactggccataaacaaaacctctgcagcactgtgacatgttcataatggccctaacgcctccgctggaaggttgtgggtttaccggaatgagggcaaggaacacccggcccgcccaggacggaaaaccccttaaaggcgttcttaagccacaaacaataccgtgagtgatctgtgccttaagaacatgctcctgctgcagttaaccagcccaacctattcctttaattcagcccgtcccttcgtttcccataagggatacttttagttgatttaacatctatagaaacaatgccaatgactggcttgctgttagtaaatacgtgggtaaatctctgttccgggctctcagctctgaaggctgtgagacccctgatttcccactccacacctctatatttctgtgtgtgtgtctttaattcctctagcgccgctgggttagggtctccccgaccaagctggtctcggcaGAGAAGGACCAGTTAATGGCTCTTCTTAGCGTAGGTAACGTGTGTTGTTGAGGAAATGCCCTCTGTGCAGCACTTGGCTGGATGTTCCTTGGTTGGCATCTTGGCTGGTGTCCATGTGCCCGGAGAAGAAGGGCCCTCTCTGACCCAGGCCTAATATGTGTCCCCCGTTTCCACCCTTCCCTGATAACCGGAGGAGATCTTGTCCTGCTGTGCTAATAGAACGTTCTCTGTATTAACAGAAAATTTTAACCCAACAGAGGTGCTTGGATGGAGGAAATCCAAACAATGTTGCTGGTTGATGGAGAGGGGCATTCCAAGGTCTCTTGTCCATTCATTTATTCATGAAGCCACATttcaacaaatatttattgtgcacctgccctgtaacaggccccgagctgtgatctgtagggggctcagtgataaatgaagcCCATTTTTCTTTAGATTCCAGATGATGGAGGGGAAGTCAGGAAGGGAGGTAGGAACCACCACTCTAGGGGTCCCTTGCCTCCTTCTTGGCATACGCTATGGCCTCGGCATGGAGAGCATGATGGAGCACTGATGCCAGCAAGTCAGCTCAACATTTGCAATATAGCGTGTGGCTGCTTAGAGGCCAACTTGATATATTAAGTGATATTAAAAATGCAGCCTATTTCTGATAATTTATGCGCATTCTCATAAACACATTACACTCGTCACAGTACATTTACGAGCACCAGGCCTACCAGTGGGGAAAAGTTAACACCTACCTAATGATGTTGTTTTGCAGATGTCAGATGCTCTGGTGCTGCAGTGATGGGCTCAATGGTGCAAAGAAATGTGAGTCTTTCTTCTATCCTGGGACACCAGAAGGCTGCCCAGGGCTCGGCAGAAGAACCAGGCTCTCTCCCTCCACACTGCCAAGGTCTGTGCTGTGGGATTGGAAAGAGGCCGGGTGAGAGGCTCCCCTGCCAGGAGAGACTTAGCCCAGGATCAGGTTGCCTAAGGCAAGAGATTAGATGTCACAGAGGGATAATGCTGAATCCTCTGGGGAAGCTCTAGAAATGACCTCTGGGAGAAAGAGGGGCAGAGCCTGAGGCTTACATCACAGCACAGATCAGCCTCCCTCCCACAGGCCTGGGGACCACTAGAGCCCAGGGTCTTCATGACTGAAGTGAACATCTCAGCATTATACATGAATTAGAATGAGTAGAAACTCTAACTAGCCCAGGGCTGGTGGAAACCCAGGGCCAACAGCCACAAATGTTCCCTCCAATCGCTTTGAATTCATGATGCAATTCAAAGGAATTAACAATGCAAGTGGTAGCTTTCAATGATAACGTGTTCCACtttttgtgtttgagagagtctcactctgttgcccaggctgaagtgcaatggcacgatctcggttcactgccacctccacctcccgggttcaagcaattttcctgcctcagcctcctgagtagctgggactacaggtgactgccaccatgtttggcgggcgtagttctttgtatttttagtagagatggggtttcaccatgttggccacgctggtcttgaattcctgacctcaggtgatctgcccgcctcagtctcccaaagtgctgggattacaggtgtgaaccaccacacctggcACCCCCCGCCttttttttttttttttttttttttttagagactggaccttgctttgtcacccaggctggaatgccatggagtgatcatggctcactgcagtcttgaactcctgggctcacgtgatcctcccatctcagcctcctcaatagccagaaccacagatatgcaccaccacgtccagttaatctttttatgttttattttttaaagatgggtcttgctatgttgcccaggctTACATGAACTGAGttttacatttactacaaatgtgtgtacctataaacaatatatggtattgttttgcaagctttaaaactttgtaaaaatggcatcatactatacataacctcattaaactcatttttggcttattattatttgtgaagttttttcctgtagatgcatctagctatagtttgtttttattgctgtctactgttccactatgtgaatacgccataatttatttctcttttctcctattgttgatcatttaagctggttcctatgcgtggctatcatcaacatgctgcaatggctattcctgtttcctggtctgtatatgggatttctttttttgagacagggtctcactctattgcccaggctaaagttcagtcgtaccatctcggctcaccacagcctcaacctccctgggctcaggcgatcctcccacctcagcctcccgagtagctgggactgcaggcacataccaccacatctggctaatttttataatttttgtagagaaagggtttgccaggttgcccaggctagtctgaaactcctgagctcaagtgatcctcccacctcagcctcgcaaagtgctgggattacaggcgtgtgccaccacacctggccagggagagtttctttaagagagtttcttggataggaaatttgctgggcctagggtctgtatatctttaccttgactagagagtgcctagctgctccccaaactggttagaaccacggccattctcaccaaccatctctaacagtgccagttcctccttgtccttgtcaacacttggtattgccagacttgaatttttgctagtctggtaggttttttttatgtttaattagttctccttgctaactagtgCTACTTCATTTCTGCTGCTAAGGGTGGGCATGTGCTGTCAATAGATAAATGCAACAGATTAAAAATTGAAGAGCTtccatcaataagggattggctaaatacagtatgcctcacctgtacaatagaatactgcacaatcattaacaaagatgAGTGTGCTGATATGGAAGAGATATTGATATTCTGATGTACTAAATATCTTTTCATCTCCCAGATTTATTGTTACAAAGCAAGAGGCATAAAAAGCATATTCCCTTTGTAAATAAATGAAAAGATATGTATACACATGCATATTTGTATGTATATGCGCAGAATACCTCTGAAAGAATGAACAGGAAACTGGTAACCACAGTTCATCTGGGAAGAGCACTAGAGGACAGGGAAACTTTTTTGCTCTGTGAATTCTTACCACGCATGTGTATTAGCCTGTTGGAAAAAATTAGCcctagaataggcaaattcgtagagactgaaagtagaatagaggttgccagaggttttggggtagagaatagggggtttttatttgatagatgcattttctgtttgagatgatgagagagttctgaaatggatagtggtgatggttgtacaacattgtgattgtacttaatgccactcaactgtacacttaaaagcggttgaaatgggctgggcacggtggctcacacctggaatcccagcgcttcgggaagccaaggtgggcagatcacctgaggtcaggagttcacgaccagcctgaccaacatggtgaaaccccgtctctactaaaaatacaaaaattagctgggcgtggtggtggtcgcctataatcccagctactcaggaggctgaggcaggagaattgcttgaacctgggaggtggaggttgcagtgagccaagatcacgccactgtactccagcctgggcaacagaagtgagacctcatctcaaaaaaaaaaaaTGTTGAAATggcctggcacaatggttcacacctgtaatcccagccctcagggatgccaaggcaagaggatcacttgagcccaggagtttgagaccagcctgggaaagatggtgagactctgtctctacaaaatgttttttaaaaattagctgggtgcagtggtgcacaccctgtggtcccag diff --git a/tests/data/genes.fasta b/tests/data/genes.fasta index 5b0261e..5814cb8 100644 --- a/tests/data/genes.fasta +++ b/tests/data/genes.fasta @@ -1,4 +1,4 @@ ->gi|563317589|dbj|AB821309.1| Homo sapiens FGFR2-AHCYL1 mRNA for FGFR2-AHCYL1 fusion kinase protein, complete cds +>AB821309.1 Homo sapiens FGFR2-AHCYL1 mRNA for FGFR2-AHCYL1 fusion kinase protein, complete cds ATGGTCAGCTGGGGTCGTTTCATCTGCCTGGTCGTGGTCACCATGGCAACCTTGTCCCTGGCCCGGCCCT CCTTCAGTTTAGTTGAGGATACCACATTAGAGCCAGAAGATGCCATCTCATCCGGAGATGATGAGGATGA CACCGATGGTGCGGAAGATTTTGTCAGTGAGAACAGTAACAACAAGAGAGCACCATACTGGACCAACACA @@ -50,7 +50,7 @@ AGCCACAACACAGGCTTTGGCACTGATAGAACTCTATAATGCACCCGAGGGGCGATACAAGCAGGATGTG TACTTGCTTCCTAAGAAAATGGATGAATACGTTGCCAGCTTGCATCTGCCATCATTTGATGCCCACCTTA CAGAGCTGACAGATGACCAAGCAAAATATCTGGGACTCAACAAAAATGGGCCATTCAAACCTAATTATTA CAGATACTAA ->gi|557361099|gb|KF435150.1| Homo sapiens MDM4 protein variant Y (MDM4) mRNA, complete cds, alternatively spliced +>KF435150.1 Homo sapiens MDM4 protein variant Y (MDM4) mRNA, complete cds, alternatively spliced ATGACATCATTTTCCACCTCTGCTCAGTGTTCAACATCTGACAGTGCTTGCAGGATCTCTCCTGGACAAA TCAATCAGGTACGACCAAAACTGCCGCTTTTGAAGATTTTGCATGCAGCAGGTGCGCAAGGTGAAATGTT CACTGTTAAAGAGGTCATGCACTATTTAGGTCAGTACATAATGGTGAAGCAACTTTATGATCAGCAGGAG @@ -58,7 +58,7 @@ CAGCATATGGTATATTGTGGTGGAGATCTTTTGGGAGAACTACTGGGACGTCAGAGCTTCTCCGTGAAAG ACCCAAGCCCTCTCTATGATATGCTAAGAAAGAATCTTGTCACTTTAGCCACTGCTACTACAGCAAAGTG CAGAGGAAAGTTCCACTTCCAGAAAAAGAACTACAGAAGACGATATCCCCACACTGCCTACCTCAGAGCA TAAATGCATACATTCTAGAGAAGGTGATTGAAGTGGGAAAAAATGATGACCTGGAGGACTC ->gi|557361097|gb|KF435149.1| Homo sapiens MDM4 protein variant G (MDM4) mRNA, complete cds +>KF435149.1 Homo sapiens MDM4 protein variant G (MDM4) mRNA, complete cds ATGACATCATTTTCCACCTCTGCTCAGTGTTCAACATCTGACAGTGCTTGCAGGATCTCTCCTGGACAAA TCAATCAGGTACGACCAAAACTGCCGCTTTTGAAGATTTTGCATGCAGCAGGTGCGCAAGGTGAAATGTT CACTGTTAAAGAGGTCATGCACTATTTAGGTCAGTACATAATGGTGAAGCAACTTTATGATCAGCAGGAG @@ -69,7 +69,7 @@ TAAATGCATACATTCTAGAGAAGATGAAGACTTAATTGAAAATTTAGCCCAAGATGAAACATCTAGGCTG GACCTTGGATTTGAGGAGTGGGATGTAGCTGGCCTGCCTTGGTGGTTTTTAGGAAACTTGAGAAGCAACT ATACACCTAGAAGTAATGGCTCAACTGATTTACAGACAAATCAGGTGATTGAAGTGGGAAAAAATGATGA CCTGGAGGACTC ->gi|543583796|ref|NR_104216.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 8, non-coding RNA +>NR_104216.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 8, non-coding RNA CCCCGCCCCTCTGGCGGCCCGCCGTCCCAGACGCGGGAAGAGCTTGGCCGGTTTCGAGTCGCTGGCCTGC AGCTTCCCTGTGGTTTCCCGAGGCTTCCTTGCTTCCCGCTCTGCGAGGAGCCTTTCATCCGAAGGCGGGA CGATGCCGGATAATCGGCAGCCGAGGAACCGGCAGCCGAGGATCCGCTCCGGGAACGAGCCTCGTTCCGC @@ -136,7 +136,7 @@ TGGGCCTCATTACAGTCACAATTGTCTATTCTGTTTCCTACCCTGAACACATTAAAATGGTAGGAACTAA TGCTTGTCTTATTTAATTACTAAAAGCCACCATTTTCTTTGATAGATTGAGCTACAGATTGTAAACTTCA TGTATTTCTTTATAAGTCAACCCTTTTCAAAGATACGCACATCAAACTGAATGAATAAATAAATATTGAG AAGTTGAAAAAAAAAAAAAAAAA ->gi|543583795|ref|NR_104215.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 7, non-coding RNA +>NR_104215.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 7, non-coding RNA CCCCGCCCCTCTGGCGGCCCGCCGTCCCAGACGCGGGAAGAGCTTGGCCGGTTTCGAGTCGCTGGCCTGC AGCTTCCCTGTGGTTTCCCGAGGCTTCCTTGCTTCCCGCTCTGCGAGGAGCCTTTCATCCGAAGGCGGGA CGATGCCGGATAATCGGCAGCCGAGGAACCGGCAGCCGAGGATCCGCTCCGGGAACGAGCCTCGTTCCGC @@ -213,7 +213,7 @@ ATGGGTCAGTGTGGAAAATGCATTAATCATATTCTAAACGTTCATGGGCCTCATTACAGTCACAATTGTC TATTCTGTTTCCTACCCTGAACACATTAAAATGGTAGGAACTAATGCTTGTCTTATTTAATTACTAAAAG CCACCATTTTCTTTGATAGATTGAGCTACAGATTGTAAACTTCATGTATTTCTTTATAAGTCAACCCTTT TCAAAGATACGCACATCAAACTGAATGAATAAATAAATATTGAGAAGTTGAAAAAAAAAAAAAAAAA ->gi|543583794|ref|NR_104212.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 6, non-coding RNA +>NR_104212.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 6, non-coding RNA CCCCGCCCCTCTGGCGGCCCGCCGTCCCAGACGCGGGAAGAGCTTGGCCGGTTTCGAGTCGCTGGCCTGC AGCTTCCCTGTGGTTTCCCGAGGCTTCCTTGCTTCCCGCTCTGCGAGGAGCCTTTCATCCGAAGGCGGGA CGATGCCGGATAATCGGCAGCCGAGGAACCGGCAGCCGAGGATCCGCTCCGGGAACGAGCCTCGTTCCGC @@ -291,7 +291,7 @@ GAAAATGCATTAATCATATTCTAAACGTTCATGGGCCTCATTACAGTCACAATTGTCTATTCTGTTTCCT ACCCTGAACACATTAAAATGGTAGGAACTAATGCTTGTCTTATTTAATTACTAAAAGCCACCATTTTCTT TGATAGATTGAGCTACAGATTGTAAACTTCATGTATTTCTTTATAAGTCAACCCTTTTCAAAGATACGCA CATCAAACTGAATGAATAAATAAATATTGAGAAGTTGAAAAAAAAAAAAAAAAA ->gi|543583788|ref|NM_001282545.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 3, mRNA +>NM_001282545.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 3, mRNA CCCCGCCCCTCTGGCGGCCCGCCGTCCCAGACGCGGGAAGAGCTTGGCCGGTTTCGAGTCGCTGGCCTGC AGCTTCCCTGTGGTTTCCCGAGGCTTCCTTGCTTCCCGCTCTGCGAGGAGCCTTTCATCCGAAGGCGGGA CGATGCCGGATAATCGGCAGCCGAGGAACCGGCAGCCGAGGATCCGCTCCGGGAACGAGCCTCGTTCCGC @@ -352,7 +352,7 @@ CATATTCTAAACGTTCATGGGCCTCATTACAGTCACAATTGTCTATTCTGTTTCCTACCCTGAACACATT AAAATGGTAGGAACTAATGCTTGTCTTATTTAATTACTAAAAGCCACCATTTTCTTTGATAGATTGAGCT ACAGATTGTAAACTTCATGTATTTCTTTATAAGTCAACCCTTTTCAAAGATACGCACATCAAACTGAATG AATAAATAAATATTGAGAAGTTGAAAAAAAAAAAAAAAAA ->gi|543583786|ref|NM_001282543.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 2, mRNA +>NM_001282543.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 2, mRNA CCCCGCCCCTCTGGCGGCCCGCCGTCCCAGACGCGGGAAGAGCTTGGCCGGTTTCGAGTCGCTGGCCTGC AGCTTCCCTGTGGTTTCCCGAGGCTTCCTTGCTTCCCGCTCTGCGAGGAGCCTTTCATCCGAAGGCGGGA CGATGCCGGATAATCGGCAGCCGAGGAACCGGCAGCCGAGGATCCGCTCCGGGAACGAGCCTCGTTCCGC @@ -432,7 +432,7 @@ ACAATTGTCTATTCTGTTTCCTACCCTGAACACATTAAAATGGTAGGAACTAATGCTTGTCTTATTTAAT TACTAAAAGCCACCATTTTCTTTGATAGATTGAGCTACAGATTGTAAACTTCATGTATTTCTTTATAAGT CAACCCTTTTCAAAGATACGCACATCAAACTGAATGAATAAATAAATATTGAGAAGTTGAAAAAAAAAAA AAAAAA ->gi|543583785|ref|NM_000465.3| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 1, mRNA +>NM_000465.3 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 1, mRNA CCCCGCCCCTCTGGCGGCCCGCCGTCCCAGACGCGGGAAGAGCTTGGCCGGTTTCGAGTCGCTGGCCTGC AGCTTCCCTGTGGTTTCCCGAGGCTTCCTTGCTTCCCGCTCTGCGAGGAGCCTTTCATCCGAAGGCGGGA CGATGCCGGATAATCGGCAGCCGAGGAACCGGCAGCCGAGGATCCGCTCCGGGAACGAGCCTCGTTCCGC @@ -512,7 +512,7 @@ GTCAGTGTGGAAAATGCATTAATCATATTCTAAACGTTCATGGGCCTCATTACAGTCACAATTGTCTATT CTGTTTCCTACCCTGAACACATTAAAATGGTAGGAACTAATGCTTGTCTTATTTAATTACTAAAAGCCAC CATTTTCTTTGATAGATTGAGCTACAGATTGTAAACTTCATGTATTTCTTTATAAGTCAACCCTTTTCAA AGATACGCACATCAAACTGAATGAATAAATAAATATTGAGAAGTTGAAAAAAAAAAAAAAAAA ->gi|543583740|ref|NM_001282549.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 5, mRNA +>NM_001282549.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 5, mRNA CCCCGCCCCTCTGGCGGCCCGCCGTCCCAGACGCGGGAAGAGCTTGGCCGGTTTCGAGTCGCTGGCCTGC AGCTTCCCTGTGGTTTCCCGAGGCTTCCTTGCTTCCCGCTCTGCGAGGAGCCTTTCATCCGAAGGCGGGA CGATGCCGGATAATCGGCAGCCGAGGAACCGGCAGCCGAGGATCCGCTCCGGGAACGAGCCTCGTTCCGC @@ -570,7 +570,7 @@ GGTCAGTGTGGAAAATGCATTAATCATATTCTAAACGTTCATGGGCCTCATTACAGTCACAATTGTCTAT TCTGTTTCCTACCCTGAACACATTAAAATGGTAGGAACTAATGCTTGTCTTATTTAATTACTAAAAGCCA CCATTTTCTTTGATAGATTGAGCTACAGATTGTAAACTTCATGTATTTCTTTATAAGTCAACCCTTTTCA AAGATACGCACATCAAACTGAATGAATAAATAAATATTGAGAAGTTGAAAAAAAAAAAAAAAAA ->gi|543583738|ref|NM_001282548.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 4, mRNA +>NM_001282548.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 4, mRNA CCCCGCCCCTCTGGCGGCCCGCCGTCCCAGACGCGGGAAGAGCTTGGCCGGTTTCGAGTCGCTGGCCTGC AGCTTCCCTGTGGTTTCCCGAGGCTTCCTTGCTTCCCGCTCTGCGAGGAGCCTTTCATCCGAAGGCGGGA CGATGCCGGATAATCGGCAGCCGAGGAACCGGCAGCCGAGGATCCGCTCCGGGAACGAGCCTCGTTCCGC @@ -630,7 +630,7 @@ AAAATGCATTAATCATATTCTAAACGTTCATGGGCCTCATTACAGTCACAATTGTCTATTCTGTTTCCTA CCCTGAACACATTAAAATGGTAGGAACTAATGCTTGTCTTATTTAATTACTAAAAGCCACCATTTTCTTT GATAGATTGAGCTACAGATTGTAAACTTCATGTATTTCTTTATAAGTCAACCCTTTTCAAAGATACGCAC ATCAAACTGAATGAATAAATAAATATTGAGAAGTTGAAAAAAAAAAAAAAAAA ->gi|530384540|ref|XM_005249645.1| PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X4, mRNA +>XM_005249645.1 PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X4, mRNA GTGCTGGGACTACAGAGTCCAGTGTCGTGCTGCTGCAGGAGCACCCCTGCCTGGTGGAGCTGCTGTCCCA TGTGCTGAAAGTCCAGGACCTGAGTTCTGGGGTCCTCTCCTTCTCACTGCGCCTGGCAGGAACCTTCGCA GCCCAGGAAAACTGCTTCCAGTATCTTCAGCAGGGGGAGTTACTACCAGGGCTCTTTGGGGAGCCAGGAC @@ -671,7 +671,7 @@ CCGACTGCTACTGAGCAGAACCAGAGTCTGCCACTGGGGCTCAGGACCAAGGGAGGCAGCACCATGTCCT TCTGTGGGACACTGCCAGCCCCAGGGCTCCAGCCCAGCCCGGTGGATCCTCTGGGGAAGCCAGGACCAGG AGAGAAGCAAGGTCAAGAAATCCCACAGTTTGATGTATTAAAGAAATGACTTATTTCTACTCAAAATAAA TGGCATTGAAGTCTTTCTTTAA ->gi|530384538|ref|XM_005249644.1| PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X3, mRNA +>XM_005249644.1 PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X3, mRNA ACCGGATGCTCGGCATGAACCACTAGGCGCCTGGCGGGGGTGATCTGTCGGAGCGACCGGCTTGGCGCCT GCCTGTCCCCAGCCCCTCTCAGCTTGAACTCCTTCCTTCAAGTCTGGGCCCTCGAGGCTTCCAGAGCGGC CTCCAGGGGTGCAGTCTCAGTTCCCCACGCCAGCCGTCTCCGTCCTCCGCCTCCTCCGGGCCTGGCAGGT @@ -715,7 +715,7 @@ AGGACATGCTGGCCACGGGAGGCTTCCTGCAGGGGGACGAGGCCGACTGCTACTGAGCAGAACCAGAGTC TGCCACTGGGGCTCAGGACCAAGGGAGGCAGCACCATGTCCTTCTGTGGGACACTGCCAGCCCCAGGGCT CCAGCCCAGCCCGGTGGATCCTCTGGGGAAGCCAGGACCAGGAGAGAAGCAAGGTCAAGAAATCCCACAG TTTGATGTATTAAAGAAATGACTTATTTCTACTCAAAATAAATGGCATTGAAGTCTTTCTTTAA ->gi|530384536|ref|XM_005249643.1| PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X2, mRNA +>XM_005249643.1 PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X2, mRNA TCTGTCGGAGCGACCGGCTTGGCGCCTGCCTGTCCCCAGCCCCTCTCAGCTTGAACTCCTTCCTTCAAGT CTGGGCCCTCGAGGCTTCCAGAGCGGCCTCCAGGGGTGCAGTCTCAGTTCCCCACGCCAGCCGTCTCCGT CCTCCGCCTCCTCCGGGCCTGGCAGGTGGCACTGTCCGGAGGCGGAGCCTTGGGCGAGGGGTGGTTGCGG @@ -761,7 +761,7 @@ GACGAGGCCGACTGCTACTGAGCAGAACCAGAGTCTGCCACTGGGGCTCAGGACCAAGGGAGGCAGCACC ATGTCCTTCTGTGGGACACTGCCAGCCCCAGGGCTCCAGCCCAGCCCGGTGGATCCTCTGGGGAAGCCAG GACCAGGAGAGAAGCAAGGTCAAGAAATCCCACAGTTTGATGTATTAAAGAAATGACTTATTTCTACTCA AAATAAATGGCATTGAAGTCTTTCTTTAA ->gi|530384534|ref|XM_005249642.1| PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X1, mRNA +>XM_005249642.1 PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X1, mRNA GCGACCGGCTTGGCGCCTGCCTGTCCCCAGCCCCTCTCAGCTTGAACTCCTTCCTTCAAGTCTGGGCCCT CGAGGCTTCCAGAGCGGCCTCCAGGGGTGCAGTCTCAGTTCCCCACGCCAGCCGTCTCCGTCCTCCGCCT CCTCCGGGCCTGGCAGGTGGCACTGTCCGGAGGCGGAGCCTTGGGCGAGGGGTGGTTGCGGCGGAGGACG @@ -807,7 +807,7 @@ TGCTACTGAGCAGAACCAGAGTCTGCCACTGGGGCTCAGGACCAAGGGAGGCAGCACCATGTCCTTCTGT GGGACACTGCCAGCCCCAGGGCTCCAGCCCAGCCCGGTGGATCCTCTGGGGAAGCCAGGACCAGGAGAGA AGCAAGGTCAAGAAATCCCACAGTTTGATGTATTAAAGAAATGACTTATTTCTACTCAAAATAAATGGCA TTGAAGTCTTTCTTTAA ->gi|530373237|ref|XM_005265508.1| PREDICTED: Homo sapiens BRCA1 associated protein-1 (ubiquitin carboxy-terminal hydrolase) (BAP1), transcript variant X2, mRNA +>XM_005265508.1 PREDICTED: Homo sapiens BRCA1 associated protein-1 (ubiquitin carboxy-terminal hydrolase) (BAP1), transcript variant X2, mRNA GCATGCCCGCATCTGCTGTCCGACAGGCGGAAGACGAGCCCAGAGGCGGAGCAGGGCCGTCGCGCCTTGG TGACGTCTGCCGCCGGCGCGGGCGGGTGACGCGACTGGGCCCGTTGTCTGTGTGTGGGACTGAGGGGCCC CGGGGGCGGTGGGGGCTCCCGGTGGGGGCAGCGGTGGGGAGGGAGGGCCTGGACATGGCGCTGAGGGGCC @@ -848,7 +848,7 @@ GAATAGTCCCAGCTGGAGAGTCCAGGCCCTGGGAATGGGAGGAACCAGGCCACATTCCTTCCATCGTGCC CTGAGGCCTGACACGGCAGATCAGCCCCATAGTGCTCAGGAGGCAGCATCTGGAGTTGGGGCACAGCGAG GTACTGCAGCTTCCTCCACAGCCGGCTGTGGAGCAGCAGGACCTGGCCCTTCTGCCTGGGCAGCAGAATA TATATTTTACCTATCAGAGACATCTATTTTTCTGGGCTCCAACCCAACATGCCACCATGTTGAC ->gi|530373235|ref|XM_005265507.1| PREDICTED: Homo sapiens BRCA1 associated protein-1 (ubiquitin carboxy-terminal hydrolase) (BAP1), transcript variant X1, mRNA +>XM_005265507.1 PREDICTED: Homo sapiens BRCA1 associated protein-1 (ubiquitin carboxy-terminal hydrolase) (BAP1), transcript variant X1, mRNA GCATGCCCGCATCTGCTGTCCGACAGGCGGAAGACGAGCCCAGAGGCGGAGCAGGGCCGTCGCGCCTTGG TGACGTCTGCCGCCGGCGCGGGCGGGTGACGCGACTGGGCCCGTTGTCTGTGTGTGGGACTGAGGGGCCC CGGGGGCGGTGGGGGCTCCCGGTGGGGGCAGCGGTGGGGAGGGAGGGCCTGGACATGGCGCTGAGGGGCC @@ -890,7 +890,7 @@ AGAGTCCAGGCCCTGGGAATGGGAGGAACCAGGCCACATTCCTTCCATCGTGCCCTGAGGCCTGACACGG CAGATCAGCCCCATAGTGCTCAGGAGGCAGCATCTGGAGTTGGGGCACAGCGAGGTACTGCAGCTTCCTC CACAGCCGGCTGTGGAGCAGCAGGACCTGGCCCTTCTGCCTGGGCAGCAGAATATATATTTTACCTATCA GAGACATCTATTTTTCTGGGCTCCAACCCAACATGCCACCATGTTGAC ->gi|530364726|ref|XR_241081.1| PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X4, misc_RNA +>XR_241081.1 PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X4, misc_RNA GTGTGGGAGGCCGGAAGTTGCGGCTTCATTACTCGCCATTTCAAAATGCTGCCGAGGCCCTAGGATCTGT GACTGCCACCCCTCCCCCCACCCGGGCTCGGCGGGGGAGCGACTCATGGAGCTGCCGTAAGTTTTACCAA CAGACTGCAGTTTCTTCACTACCAAAATGACATCATTTTCCACCTCTGCTCAGTGTTCAACATCTGACAG @@ -906,7 +906,7 @@ GTGATTGAAGTGGGAAAAAATGATGACCTGGAGGACTCTAAGTCCTTAAGTGATGATACCGATGTAGAGG TTACCTCTGAGGATGAGTGGCAGTGTACTGAATGCAAGAAATTTAACTCTCCAAGCAAGAGGTACTGTTT TCGTTGTTGGGCCTTGAGGAAGGATTGGTATTCAGATTGTTCAAAGTTAACCCATTCTCTCTCCACGTCT GATATCACTGCCATACCTGAAAAGGAAAA ->gi|530364725|ref|XR_241080.1| PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X3, misc_RNA +>XR_241080.1 PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X3, misc_RNA GTGTGGGAGGCCGGAAGTTGCGGCTTCATTACTCGCCATTTCAAAATGCTGCCGAGGCCCTAGGATCTGT GACTGCCACCCCTCCCCCCACCCGGGCTCGGCGGGGGAGCGACTCATGGAGCTGCCGTAAGTTTTACCAA CAGACTGCAGTTTCTTCACTACCAAAATGACATCATTTTCCACCTCTGCTCAGTGTTCAACATCTGACAG @@ -977,7 +977,7 @@ AAAGGGGCCAGACTGTGTTGCCTTCTTGAGCCTGGTCTGACTCCTGAGTGGAAGTCTGATTCCAGGTACA TGAGATAAGCACTAATACCTCCAGTTTGCAGATTAAGAGACTGAGGTCCTGAAGAGGTTAAAGAACTTGG CTCAAGTCACATAGCTGGTGAGCAGCAAGATACAAGAATCAACCCAAGTCCAGGGGGCTGTGTGCCGTTT ACACTTCACATCTGTGCTGCCAGGGCTGTAGCTATAAAAGCTTGAAAACCATTA ->gi|530364724|ref|XR_241079.1| PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X2, misc_RNA +>XR_241079.1 PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X2, misc_RNA AAGTTGCGGCTTCATTACTCGCCATTTCAAAATGCTGCCGAGGCCCTAGGATCTGTGACTGCCACCCCTC CCCCCACCCGGGCTCGGCGGGGGAGCGACTCATGGAGCTGCCGTAAGTTTTACCAACAGACTGCAGTTTC TTCACTACCAAAATGACATCATTTTCCACCTCTGCTCAGTGTTCAACATCTGACAGTGCTTGCAGGATCT diff --git a/tests/data/genes.fasta.lower b/tests/data/genes.fasta.lower index d88bdc6..7df934b 100644 --- a/tests/data/genes.fasta.lower +++ b/tests/data/genes.fasta.lower @@ -1,4 +1,4 @@ ->gi|563317589|dbj|AB821309.1| Homo sapiens FGFR2-AHCYL1 mRNA for FGFR2-AHCYL1 fusion kinase protein, complete cds +>AB821309.1 Homo sapiens FGFR2-AHCYL1 mRNA for FGFR2-AHCYL1 fusion kinase protein, complete cds atggtcagctggggtcgtttcatctgcctggtcgtggtcaccatggcaaccttgtccctggcccggccct ccttcagtttagttgaggataccacattagagccagaagatgccatctcatccggagatgatgaggatga caccgatggtgcggaagattttgtcagtgagaacagtaacaacaagagagcaccatactggaccaacaca @@ -50,7 +50,7 @@ agccacaacacaggctttggcactgatagaactctataatgcacccgaggggcgatacaagcaggatgtg tacttgcttcctaagaaaatggatgaatacgttgccagcttgcatctgccatcatttgatgcccacctta cagagctgacagatgaccaagcaaaatatctgggactcaacaaaaatgggccattcaaacctaattatta cagatactaa ->gi|557361099|gb|KF435150.1| Homo sapiens MDM4 protein variant Y (MDM4) mRNA, complete cds, alternatively spliced +>KF435150.1 Homo sapiens MDM4 protein variant Y (MDM4) mRNA, complete cds, alternatively spliced atgacatcattttccacctctgctcagtgttcaacatctgacagtgcttgcaggatctctcctggacaaa tcaatcaggtacgaccaaaactgccgcttttgaagattttgcatgcagcaggtgcgcaaggtgaaatgtt cactgttaaagaggtcatgcactatttaggtcagtacataatggtgaagcaactttatgatcagcaggag @@ -58,7 +58,7 @@ cagcatatggtatattgtggtggagatcttttgggagaactactgggacgtcagagcttctccgtgaaag acccaagccctctctatgatatgctaagaaagaatcttgtcactttagccactgctactacagcaaagtg cagaggaaagttccacttccagaaaaagaactacagaagacgatatccccacactgcctacctcagagca taaatgcatacattctagagaaggtgattgaagtgggaaaaaatgatgacctggaggactc ->gi|557361097|gb|KF435149.1| Homo sapiens MDM4 protein variant G (MDM4) mRNA, complete cds +>KF435149.1 Homo sapiens MDM4 protein variant G (MDM4) mRNA, complete cds atgacatcattttccacctctgctcagtgttcaacatctgacagtgcttgcaggatctctcctggacaaa tcaatcaggtacgaccaaaactgccgcttttgaagattttgcatgcagcaggtgcgcaaggtgaaatgtt cactgttaaagaggtcatgcactatttaggtcagtacataatggtgaagcaactttatgatcagcaggag @@ -69,7 +69,7 @@ taaatgcatacattctagagaagatgaagacttaattgaaaatttagcccaagatgaaacatctaggctg gaccttggatttgaggagtgggatgtagctggcctgccttggtggtttttaggaaacttgagaagcaact atacacctagaagtaatggctcaactgatttacagacaaatcaggtgattgaagtgggaaaaaatgatga cctggaggactc ->gi|543583796|ref|NR_104216.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 8, non-coding RNA +>NR_104216.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 8, non-coding RNA ccccgcccctctggcggcccgccgtcccagacgcgggaagagcttggccggtttcgagtcgctggcctgc agcttccctgtggtttcccgaggcttccttgcttcccgctctgcgaggagcctttcatccgaaggcggga cgatgccggataatcggcagccgaggaaccggcagccgaggatccgctccgggaacgagcctcgttccgc @@ -136,7 +136,7 @@ tgggcctcattacagtcacaattgtctattctgtttcctaccctgaacacattaaaatggtaggaactaa tgcttgtcttatttaattactaaaagccaccattttctttgatagattgagctacagattgtaaacttca tgtatttctttataagtcaacccttttcaaagatacgcacatcaaactgaatgaataaataaatattgag aagttgaaaaaaaaaaaaaaaaa ->gi|543583795|ref|NR_104215.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 7, non-coding RNA +>NR_104215.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 7, non-coding RNA ccccgcccctctggcggcccgccgtcccagacgcgggaagagcttggccggtttcgagtcgctggcctgc agcttccctgtggtttcccgaggcttccttgcttcccgctctgcgaggagcctttcatccgaaggcggga cgatgccggataatcggcagccgaggaaccggcagccgaggatccgctccgggaacgagcctcgttccgc @@ -213,7 +213,7 @@ atgggtcagtgtggaaaatgcattaatcatattctaaacgttcatgggcctcattacagtcacaattgtc tattctgtttcctaccctgaacacattaaaatggtaggaactaatgcttgtcttatttaattactaaaag ccaccattttctttgatagattgagctacagattgtaaacttcatgtatttctttataagtcaacccttt tcaaagatacgcacatcaaactgaatgaataaataaatattgagaagttgaaaaaaaaaaaaaaaaa ->gi|543583794|ref|NR_104212.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 6, non-coding RNA +>NR_104212.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 6, non-coding RNA ccccgcccctctggcggcccgccgtcccagacgcgggaagagcttggccggtttcgagtcgctggcctgc agcttccctgtggtttcccgaggcttccttgcttcccgctctgcgaggagcctttcatccgaaggcggga cgatgccggataatcggcagccgaggaaccggcagccgaggatccgctccgggaacgagcctcgttccgc @@ -291,7 +291,7 @@ gaaaatgcattaatcatattctaaacgttcatgggcctcattacagtcacaattgtctattctgtttcct accctgaacacattaaaatggtaggaactaatgcttgtcttatttaattactaaaagccaccattttctt tgatagattgagctacagattgtaaacttcatgtatttctttataagtcaacccttttcaaagatacgca catcaaactgaatgaataaataaatattgagaagttgaaaaaaaaaaaaaaaaa ->gi|543583788|ref|NM_001282545.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 3, mRNA +>NM_001282545.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 3, mRNA ccccgcccctctggcggcccgccgtcccagacgcgggaagagcttggccggtttcgagtcgctggcctgc agcttccctgtggtttcccgaggcttccttgcttcccgctctgcgaggagcctttcatccgaaggcggga cgatgccggataatcggcagccgaggaaccggcagccgaggatccgctccgggaacgagcctcgttccgc @@ -352,7 +352,7 @@ catattctaaacgttcatgggcctcattacagtcacaattgtctattctgtttcctaccctgaacacatt aaaatggtaggaactaatgcttgtcttatttaattactaaaagccaccattttctttgatagattgagct acagattgtaaacttcatgtatttctttataagtcaacccttttcaaagatacgcacatcaaactgaatg aataaataaatattgagaagttgaaaaaaaaaaaaaaaaa ->gi|543583786|ref|NM_001282543.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 2, mRNA +>NM_001282543.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 2, mRNA ccccgcccctctggcggcccgccgtcccagacgcgggaagagcttggccggtttcgagtcgctggcctgc agcttccctgtggtttcccgaggcttccttgcttcccgctctgcgaggagcctttcatccgaaggcggga cgatgccggataatcggcagccgaggaaccggcagccgaggatccgctccgggaacgagcctcgttccgc @@ -432,7 +432,7 @@ acaattgtctattctgtttcctaccctgaacacattaaaatggtaggaactaatgcttgtcttatttaat tactaaaagccaccattttctttgatagattgagctacagattgtaaacttcatgtatttctttataagt caacccttttcaaagatacgcacatcaaactgaatgaataaataaatattgagaagttgaaaaaaaaaaa aaaaaa ->gi|543583785|ref|NM_000465.3| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 1, mRNA +>NM_000465.3 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 1, mRNA ccccgcccctctggcggcccgccgtcccagacgcgggaagagcttggccggtttcgagtcgctggcctgc agcttccctgtggtttcccgaggcttccttgcttcccgctctgcgaggagcctttcatccgaaggcggga cgatgccggataatcggcagccgaggaaccggcagccgaggatccgctccgggaacgagcctcgttccgc @@ -512,7 +512,7 @@ gtcagtgtggaaaatgcattaatcatattctaaacgttcatgggcctcattacagtcacaattgtctatt ctgtttcctaccctgaacacattaaaatggtaggaactaatgcttgtcttatttaattactaaaagccac cattttctttgatagattgagctacagattgtaaacttcatgtatttctttataagtcaacccttttcaa agatacgcacatcaaactgaatgaataaataaatattgagaagttgaaaaaaaaaaaaaaaaa ->gi|543583740|ref|NM_001282549.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 5, mRNA +>NM_001282549.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 5, mRNA ccccgcccctctggcggcccgccgtcccagacgcgggaagagcttggccggtttcgagtcgctggcctgc agcttccctgtggtttcccgaggcttccttgcttcccgctctgcgaggagcctttcatccgaaggcggga cgatgccggataatcggcagccgaggaaccggcagccgaggatccgctccgggaacgagcctcgttccgc @@ -570,7 +570,7 @@ ggtcagtgtggaaaatgcattaatcatattctaaacgttcatgggcctcattacagtcacaattgtctat tctgtttcctaccctgaacacattaaaatggtaggaactaatgcttgtcttatttaattactaaaagcca ccattttctttgatagattgagctacagattgtaaacttcatgtatttctttataagtcaacccttttca aagatacgcacatcaaactgaatgaataaataaatattgagaagttgaaaaaaaaaaaaaaaaa ->gi|543583738|ref|NM_001282548.1| Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 4, mRNA +>NM_001282548.1 Homo sapiens BRCA1 associated RING domain 1 (BARD1), transcript variant 4, mRNA ccccgcccctctggcggcccgccgtcccagacgcgggaagagcttggccggtttcgagtcgctggcctgc agcttccctgtggtttcccgaggcttccttgcttcccgctctgcgaggagcctttcatccgaaggcggga cgatgccggataatcggcagccgaggaaccggcagccgaggatccgctccgggaacgagcctcgttccgc @@ -630,7 +630,7 @@ aaaatgcattaatcatattctaaacgttcatgggcctcattacagtcacaattgtctattctgtttccta ccctgaacacattaaaatggtaggaactaatgcttgtcttatttaattactaaaagccaccattttcttt gatagattgagctacagattgtaaacttcatgtatttctttataagtcaacccttttcaaagatacgcac atcaaactgaatgaataaataaatattgagaagttgaaaaaaaaaaaaaaaaa ->gi|530384540|ref|XM_005249645.1| PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X4, mRNA +>XM_005249645.1 PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X4, mRNA gtgctgggactacagagtccagtgtcgtgctgctgcaggagcacccctgcctggtggagctgctgtccca tgtgctgaaagtccaggacctgagttctggggtcctctccttctcactgcgcctggcaggaaccttcgca gcccaggaaaactgcttccagtatcttcagcagggggagttactaccagggctctttggggagccaggac @@ -671,7 +671,7 @@ ccgactgctactgagcagaaccagagtctgccactggggctcaggaccaagggaggcagcaccatgtcct tctgtgggacactgccagccccagggctccagcccagcccggtggatcctctggggaagccaggaccagg agagaagcaaggtcaagaaatcccacagtttgatgtattaaagaaatgacttatttctactcaaaataaa tggcattgaagtctttctttaa ->gi|530384538|ref|XM_005249644.1| PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X3, mRNA +>XM_005249644.1 PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X3, mRNA accggatgctcggcatgaaccactaggcgcctggcgggggtgatctgtcggagcgaccggcttggcgcct gcctgtccccagcccctctcagcttgaactccttccttcaagtctgggccctcgaggcttccagagcggc ctccaggggtgcagtctcagttccccacgccagccgtctccgtcctccgcctcctccgggcctggcaggt @@ -715,7 +715,7 @@ aggacatgctggccacgggaggcttcctgcagggggacgaggccgactgctactgagcagaaccagagtc tgccactggggctcaggaccaagggaggcagcaccatgtccttctgtgggacactgccagccccagggct ccagcccagcccggtggatcctctggggaagccaggaccaggagagaagcaaggtcaagaaatcccacag tttgatgtattaaagaaatgacttatttctactcaaaataaatggcattgaagtctttctttaa ->gi|530384536|ref|XM_005249643.1| PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X2, mRNA +>XM_005249643.1 PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X2, mRNA tctgtcggagcgaccggcttggcgcctgcctgtccccagcccctctcagcttgaactccttccttcaagt ctgggccctcgaggcttccagagcggcctccaggggtgcagtctcagttccccacgccagccgtctccgt cctccgcctcctccgggcctggcaggtggcactgtccggaggcggagccttgggcgaggggtggttgcgg @@ -761,7 +761,7 @@ gacgaggccgactgctactgagcagaaccagagtctgccactggggctcaggaccaagggaggcagcacc atgtccttctgtgggacactgccagccccagggctccagcccagcccggtggatcctctggggaagccag gaccaggagagaagcaaggtcaagaaatcccacagtttgatgtattaaagaaatgacttatttctactca aaataaatggcattgaagtctttctttaa ->gi|530384534|ref|XM_005249642.1| PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X1, mRNA +>XM_005249642.1 PREDICTED: Homo sapiens BRCA1-associated ATM activator 1 (BRAT1), transcript variant X1, mRNA gcgaccggcttggcgcctgcctgtccccagcccctctcagcttgaactccttccttcaagtctgggccct cgaggcttccagagcggcctccaggggtgcagtctcagttccccacgccagccgtctccgtcctccgcct cctccgggcctggcaggtggcactgtccggaggcggagccttgggcgaggggtggttgcggcggaggacg @@ -807,7 +807,7 @@ tgctactgagcagaaccagagtctgccactggggctcaggaccaagggaggcagcaccatgtccttctgt gggacactgccagccccagggctccagcccagcccggtggatcctctggggaagccaggaccaggagaga agcaaggtcaagaaatcccacagtttgatgtattaaagaaatgacttatttctactcaaaataaatggca ttgaagtctttctttaa ->gi|530373237|ref|XM_005265508.1| PREDICTED: Homo sapiens BRCA1 associated protein-1 (ubiquitin carboxy-terminal hydrolase) (BAP1), transcript variant X2, mRNA +>XM_005265508.1 PREDICTED: Homo sapiens BRCA1 associated protein-1 (ubiquitin carboxy-terminal hydrolase) (BAP1), transcript variant X2, mRNA gcatgcccgcatctgctgtccgacaggcggaagacgagcccagaggcggagcagggccgtcgcgccttgg tgacgtctgccgccggcgcgggcgggtgacgcgactgggcccgttgtctgtgtgtgggactgaggggccc cgggggcggtgggggctcccggtgggggcagcggtggggagggagggcctggacatggcgctgaggggcc @@ -848,7 +848,7 @@ gaatagtcccagctggagagtccaggccctgggaatgggaggaaccaggccacattccttccatcgtgcc ctgaggcctgacacggcagatcagccccatagtgctcaggaggcagcatctggagttggggcacagcgag gtactgcagcttcctccacagccggctgtggagcagcaggacctggcccttctgcctgggcagcagaata tatattttacctatcagagacatctatttttctgggctccaacccaacatgccaccatgttgac ->gi|530373235|ref|XM_005265507.1| PREDICTED: Homo sapiens BRCA1 associated protein-1 (ubiquitin carboxy-terminal hydrolase) (BAP1), transcript variant X1, mRNA +>XM_005265507.1 PREDICTED: Homo sapiens BRCA1 associated protein-1 (ubiquitin carboxy-terminal hydrolase) (BAP1), transcript variant X1, mRNA gcatgcccgcatctgctgtccgacaggcggaagacgagcccagaggcggagcagggccgtcgcgccttgg tgacgtctgccgccggcgcgggcgggtgacgcgactgggcccgttgtctgtgtgtgggactgaggggccc cgggggcggtgggggctcccggtgggggcagcggtggggagggagggcctggacatggcgctgaggggcc @@ -890,7 +890,7 @@ agagtccaggccctgggaatgggaggaaccaggccacattccttccatcgtgccctgaggcctgacacgg cagatcagccccatagtgctcaggaggcagcatctggagttggggcacagcgaggtactgcagcttcctc cacagccggctgtggagcagcaggacctggcccttctgcctgggcagcagaatatatattttacctatca gagacatctatttttctgggctccaacccaacatgccaccatgttgac ->gi|530364726|ref|XR_241081.1| PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X4, misc_RNA +>XR_241081.1 PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X4, misc_RNA gtgtgggaggccggaagttgcggcttcattactcgccatttcaaaatgctgccgaggccctaggatctgt gactgccacccctccccccacccgggctcggcgggggagcgactcatggagctgccgtaagttttaccaa cagactgcagtttcttcactaccaaaatgacatcattttccacctctgctcagtgttcaacatctgacag @@ -906,7 +906,7 @@ gtgattgaagtgggaaaaaatgatgacctggaggactctaagtccttaagtgatgataccgatgtagagg ttacctctgaggatgagtggcagtgtactgaatgcaagaaatttaactctccaagcaagaggtactgttt tcgttgttgggccttgaggaaggattggtattcagattgttcaaagttaacccattctctctccacgtct gatatcactgccatacctgaaaaggaaaa ->gi|530364725|ref|XR_241080.1| PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X3, misc_RNA +>XR_241080.1 PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X3, misc_RNA gtgtgggaggccggaagttgcggcttcattactcgccatttcaaaatgctgccgaggccctaggatctgt gactgccacccctccccccacccgggctcggcgggggagcgactcatggagctgccgtaagttttaccaa cagactgcagtttcttcactaccaaaatgacatcattttccacctctgctcagtgttcaacatctgacag @@ -977,7 +977,7 @@ aaaggggccagactgtgttgccttcttgagcctggtctgactcctgagtggaagtctgattccaggtaca tgagataagcactaatacctccagtttgcagattaagagactgaggtcctgaagaggttaaagaacttgg ctcaagtcacatagctggtgagcagcaagatacaagaatcaacccaagtccagggggctgtgtgccgttt acacttcacatctgtgctgccagggctgtagctataaaagcttgaaaaccatta ->gi|530364724|ref|XR_241079.1| PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X2, misc_RNA +>XR_241079.1 PREDICTED: Homo sapiens Mdm4 p53 binding protein homolog (mouse) (MDM4), transcript variant X2, misc_RNA aagttgcggcttcattactcgccatttcaaaatgctgccgaggccctaggatctgtgactgccacccctc cccccacccgggctcggcgggggagcgactcatggagctgccgtaagttttaccaacagactgcagtttc ttcactaccaaaatgacatcattttccacctctgctcagtgttcaacatctgacagtgcttgcaggatct diff --git a/tests/test_Fasta_bgzip.py b/tests/test_Fasta_bgzip.py index 27f18f0..3bec1c6 100644 --- a/tests/test_Fasta_bgzip.py +++ b/tests/test_Fasta_bgzip.py @@ -25,7 +25,6 @@ def tearDown(self): except EnvironmentError: pass # some tests may delete this file - @expectedFailure def test_build_issue_126(self): """ Samtools BGZF index should be identical to pyfaidx BGZF index """ expect_index = ("gi|563317589|dbj|AB821309.1| 3510 114 70 71\n" @@ -121,6 +120,7 @@ def test_fetch_whole_entry(self): 'TACATTCTAGAGAAGGTGATTGAAGTGGGAAAAAATGATGACCTGGAGGACTC') result = faidx.fetch('gi|557361099|gb|KF435150.1|', 1, 481) + print(result) assert str(result) == expect def test_fetch_middle(self): @@ -185,7 +185,9 @@ def test_fetch_keyerror(self): def test_blank_string(self): """ seq[0:0] should return a blank string mdshw5/pyfaidx#53 """ fasta = Fasta('data/genes.fasta.gz', as_raw=True) - assert fasta['gi|557361099|gb|KF435150.1|'][0:0] == '' + result = fasta['gi|557361099|gb|KF435150.1|'][0:0] + print(result) + assert result == '' def test_slice_from_beginning(self): fasta = Fasta('data/genes.fasta.gz', as_raw=True)