-
Notifications
You must be signed in to change notification settings - Fork 40
Open
Description
Hi, I'm now using CoolBox to drawy my interaction plot in metatheria genomes. But I found that some chromosomes will always raise the error that tbi cannot be generated for gtf/bed files. I modified it to csi file and then it works. Is it possible that switching to csi index is a better alternative for chromosomes' size over 512 Mb?
def build_bed_index(file):
file = osp.expanduser(file)
if file.endswith(".bgz"):
bgz_file = file
else:
bgz_file = file + '.bgz'
log.info(f"Bgzip bed file, save to {bgz_file}")
bgz_bed(file, bgz_file)
if not osp.exists(bgz_file + '.csi'):
log.info(f"Make csi of bgz file, save to {bgz_file}.csi")
index_bed(bgz_file)
return bgz_file
def index_bed(bgz_path):
cmd = ['tabix', '-p', 'bed', bgz_path, '-C']
subp.check_call(cmd)
...
def tabix_index(filename, preset="gff"):
"""Call tabix to create an index for a bgzip-compressed file."""
subp.check_call([
'tabix', '-p', preset, filename, '-C'
])
def build_gtf_index(file):
file = osp.expanduser(file)
if file.endswith(".gtf"):
bgz_file = file + ".bgz"
if not osp.exists(bgz_file):
log.info(f"Process the gtf and do bgzip, save to {bgz_file}.")
process_gtf(file, bgz_file)
elif file.endswith(".gtf.gz"):
bgz_file = file.rstrip(".gz") + ".bgz"
log.info(f"Convert .gtf.gz to .gtf.bgz, save to {bgz_file}.")
if not osp.exists(bgz_file):
gtf_gz_to_bgz(file, bgz_file)
elif file.endswith(".gtf.bgz"):
bgz_file = file
else:
raise IOError(f"GTF track only support GTF file(.gtf or .gtf.gz), got {file}.")
idx_file = bgz_file + ".csi"
if not osp.exists(idx_file):
log.info(f"Tabix index not found, build it in {idx_file}")
tabix_index(bgz_file)
return bgz_file
Metadata
Metadata
Assignees
Labels
No labels