track

Contains the Track class and plotting functions

source

Track

 Track (ylim:tuple=None, height:int=200, tools:str='xwheel_zoom,
        ywheel_zoom, pan, box_zoom, save, reset', output_backend='webgl',
        **kwargs)

Track objects should only be created through GenomeBrowser.add_track

Type Default Details
ylim tuple None limits of the y axis. If not specified, ylim will be set automatically with the max and min of the data plotted with Track.line, Track.scatter or Track.bar
height int 200 size of the track
tools str xwheel_zoom, ywheel_zoom, pan, box_zoom, save, reset comma separated list of Bokeh tools that can be used to navigate the plot
output_backend str webgl
kwargs

Adding a track with random points as a demonstration. genomeNotebook uses the Bokeh library and track.fig is a simple Bokeh figure on which you can plot anything you want using Bokeh.

from genomenotebook.browser import GenomeBrowser
from genomenotebook.data import get_example_data_dir
import os
import numpy as np
data_path = get_example_data_dir()
genome_path = os.path.join(data_path, "MG1655_U00096.fasta")
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")

g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,100000), search=False, show_seq=False,toolbar_location="above")

track = g.add_track(toolbar_location="above")

x= np.arange(0,100000,100)
y= np.random.randint(0,10,size=x.shape)
track.fig.scatter(x=x,y=y)
g.show()

source

Track.line

 Track.line (data:pandas.core.frame.DataFrame, pos:str, y:str,
             hover_data:list=[], **kwargs)
Type Default Details
data DataFrame pandas DataFrame containing the data
pos str name of the column containing the positions along the genome
y str name of the column containing the data to be plotted on the y-axis
hover_data list [] list of column names to be shown when hovering over the data
kwargs

Additional kwargs are passed as is to bokeh.plotting.figure.line

Plotting some ChIP-seq data

g=GenomeBrowser(genome_path=genome_path, 
                gff_path=gff_path, 
                init_pos=50000,
                bounds=(30000,85000), 
                search=False, 
                show_seq=False)

#Importing some coverage data from a BigWig file
bw_file_path=os.path.join(data_path,"ChIP-ACCCA-1.bw")
refname='NC_000913'
with pyBigWig.open(bw_file_path) as bw:
    cov=bw.values(refname,0,g.seq_len,numpy=True)
    
data=pd.DataFrame({"pos": np.arange(0,g.seq_len,10),
                     "cov": cov[::10]})

track=g.add_track()
track.line(data,pos="pos",y="cov", 
           line_color="blue",
           line_width=2)

g.show()
AttributeError: 'Track' object has no attribute 'ylim'

source

Track.scatter

 Track.scatter (data:pandas.core.frame.DataFrame, pos:str, y:str,
                factors:str=None, hover_data:list=[], **kwargs)
Type Default Details
data DataFrame pandas DataFrame containing the data
pos str name of the column containing the positions along the genome
y str name of the column containing the data to be plotted on the y-axis
factors str None name of a column of values to be used as factors
hover_data list [] list of additional column names to be shown when hovering over the data
kwargs

Additional kwargs are passed as is to bokeh.plotting.figure.scatter

Plotting some CRISPR screen data

import pandas as pd
#Opening the Cui 2018 CRISPRi screen data
cui2018data="https://gitlab.pasteur.fr/dbikard/badSeed_public/raw/master/screen_data.csv"
cui2018data=pd.read_csv(cui2018data)
cui2018data.head()
guide gene essential pos ori coding fit18 fit75 ntargets seq
0 AAAAAACCTGCTGGTGAGGC NaN NaN 2202483 - NaN -4.850012 -1.437546 1 AAAGCAGATCACAGTAAATAAAAAAACCTGCTGGTGAGGCAGGTTC...
1 AAAAAACGTATTCGCTTGCA curA False 1517891 + False -0.094026 -0.100313 1 TGTTGATGGCTACAGTGCTGAAAAAACGTATTCGCTTGCAAGGTTT...
2 AAAAAAGCGCACTTTTTGAC NaN NaN 1919717 + NaN -1.109310 -0.246740 1 GTAACGCCTGACAGCGCACAAAAAAAGCGCACTTTTTGACTGGCAC...
3 AAAAAAGCGGTGACTTACGA bglA False 3042929 + False -1.328831 -0.905068 1 GCGCCCATATCGAAGAGATGAAAAAAGCGGTGACTTACGATGGCGT...
4 AAAAAATCTGCCCGTGTCGT gyrA True 2337231 - False -0.840373 -0.598858 1 ATGACTGGAACAAAGCCTATAAAAAATCTGCCCGTGTCGTTGGTGA...
g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,100000), search=False, show_seq=False)

track=g.add_track(height=100)
track.scatter(data=cui2018data,pos="pos",y="fit75",factors="ori", hover_data=["guide"])

track2=g.add_track(height=100)
track2.scatter(data=cui2018data,pos="pos",y="fit18",factors="ori")
g.show()

source

Track.bar

 Track.bar (data:pandas.core.frame.DataFrame, pos:str, y:str,
            factors:str=None, hover_data:list=[], **kwargs)
Type Default Details
data DataFrame pandas DataFrame containing the data
pos str name of the column containing the positions along the genome
y str name of the column containing the data to be plotted on the y-axis
factors str None name of a column of values to be used as factors
hover_data list [] list of additional column names to be shown when hovering over the data
kwargs

Additional kwargs are passed as is to bokeh.plotting.figure.vbar

Showing the same data as vertical bars

g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,100000))
track=g.add_track()
track.bar(data=cui2018data,pos="pos",y="fit75",factors="ori")
g.show()

source

Track.highlight

 Track.highlight (data:pandas.core.frame.DataFrame, left:str='left',
                  right:str='right', color:str='color', alpha:str=0.2,
                  hover_data:list=[], **kwargs)
Type Default Details
data DataFrame pandas DataFrame containing the data
left str left name of the column containing the start positions of the regions
right str right name of the column containing the end positions of the regions
color str color color of the regions
alpha str 0.2 transparency
hover_data list [] list of additional column names to be shown when hovering over the data
kwargs

Additional kwargs are passed as is to bokeh.models.Rect

g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,10000), search=False)
track=g.add_track()
track.scatter(data=cui2018data,pos="pos",y="fit75",factors="ori")
highlight_regions=pd.DataFrame({"left": [5000, 8000], "right": [6000, 8500], "color": ["red","green"]})
track.highlight(data=highlight_regions, left="left", right="right", color="color")
g.show()
g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,10000), search=False)
track=g.add_track()
track.scatter(data=cui2018data,pos="pos",y="fit75",factors="ori")

highlight_regions=pd.DataFrame({"left": [5000, 8000], "right": [6000, 8500], "color": ["red","green"]})

g.highlight(data=highlight_regions, left="left", right="right", color="color", highlight_tracks=True)
g.show()