Skip to content

Maybe csi index is better? #112

@zoomlion

Description

@zoomlion

Hi, I'm now using CoolBox to drawy my interaction plot in metatheria genomes. But I found that some chromosomes will always raise the error that tbi cannot be generated for gtf/bed files. I modified it to csi file and then it works. Is it possible that switching to csi index is a better alternative for chromosomes' size over 512 Mb?

def build_bed_index(file):
    file = osp.expanduser(file)
    if file.endswith(".bgz"):
        bgz_file = file
    else:
        bgz_file = file + '.bgz'
        log.info(f"Bgzip bed file, save to {bgz_file}")
        bgz_bed(file, bgz_file)
    if not osp.exists(bgz_file + '.csi'):
        log.info(f"Make csi of bgz file, save to {bgz_file}.csi")
        index_bed(bgz_file)
    return bgz_file

def index_bed(bgz_path):
    cmd = ['tabix', '-p', 'bed', bgz_path, '-C']
    subp.check_call(cmd)

...

def tabix_index(filename, preset="gff"):
    """Call tabix to create an index for a bgzip-compressed file."""
    subp.check_call([
        'tabix', '-p', preset, filename, '-C'
    ])

def build_gtf_index(file):
    file = osp.expanduser(file)
    if file.endswith(".gtf"):
        bgz_file = file + ".bgz"
        if not osp.exists(bgz_file):
            log.info(f"Process the gtf and do bgzip, save to {bgz_file}.")
            process_gtf(file, bgz_file)
    elif file.endswith(".gtf.gz"):
        bgz_file = file.rstrip(".gz") + ".bgz"
        log.info(f"Convert .gtf.gz to .gtf.bgz, save to {bgz_file}.")
        if not osp.exists(bgz_file):
            gtf_gz_to_bgz(file, bgz_file)
    elif file.endswith(".gtf.bgz"):
        bgz_file = file
    else:
        raise IOError(f"GTF track only support GTF file(.gtf or .gtf.gz), got {file}.")

    idx_file = bgz_file + ".csi"
    if not osp.exists(idx_file):
        log.info(f"Tabix index not found, build it in {idx_file}")
        tabix_index(bgz_file)
    return bgz_file

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions