Source code for pybedtools.contrib.bigwig

"""
Module to help create scaled bigWig files from BAM
"""
import pybedtools
import os
import subprocess


def mapped_read_count(bam, force=False):
    """
    Scale is cached in a bam.scale file containing the number of mapped reads.
    Use force=True to override caching.
    """
    scale_fn = bam + '.scale'
    if os.path.exists(scale_fn) and not force:
        for line in open(scale_fn):
            if line.startswith('#'):
                continue
            readcount = float(line.strip())
            return readcount

    cmds = ['samtools',
            'view',
            '-c',
            '-F', '0x4',
            bam]
    p = subprocess.Popen(cmds, stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    stdout, stderr = p.communicate()
    if stderr:
        raise ValueError('samtools says: %s' % stderr)

    readcount = float(stdout)

    # write to file so the next time you need the lib size you can access
    # it quickly
    if not os.path.exists(scale_fn):
        fout = open(scale_fn, 'w')
        fout.write(str(readcount) + '\n')
        fout.close()
    return readcount


[docs]def bedgraph_to_bigwig(bedgraph, genome, output): genome_file = pybedtools.chromsizes_to_file(pybedtools.chromsizes(genome)) cmds = [ 'bedGraphToBigWig', bedgraph.fn, genome_file, output] os.system(' '.join(cmds)) return output
[docs]def wig_to_bigwig(wig, genome, output): genome_file = pybedtools.chromsizes_to_file(pybedtools.chromsizes(genome)) cmds = [ 'wigToBigWig', wig.fn, genome_file, output] os.system(' '.join(cmds)) return output
[docs]def bam_to_bigwig(bam, genome, output, scale=False): """ Given a BAM file `bam` and assembly `genome`, create a bigWig file scaled such that the values represent scaled reads -- that is, reads per million mapped reads. (Disable this scaling step with scale=False; in this case values will indicate number of reads) Assumes that `bedGraphToBigWig` from UCSC tools is installed; see http://genome.ucsc.edu/goldenPath/help/bigWig.html for more details on the format. """ genome_file = pybedtools.chromsizes_to_file(pybedtools.chromsizes(genome)) kwargs = dict(bg=True, split=True, g=genome_file) if scale: readcount = mapped_read_count(bam) _scale = 1 / (readcount / 1e6) kwargs['scale'] = _scale x = pybedtools.BedTool(bam).genome_coverage(**kwargs) cmds = [ 'bedGraphToBigWig', x.fn, genome_file, output] os.system(' '.join(cmds))