track

Contains the Track class and plotting functions

source

Track

 Track (ylim:tuple=None, height:int=200, tools:str='xwheel_zoom,
        ywheel_zoom, pan, box_zoom, save, reset', **kwargs)

Track objects should only be created through GenomeBrowser.add_track

Type Default Details
ylim tuple None limits of the y axis. If not specified, ylim will be set automatically with the max and min of the data plotted with Track.line, Track.scatter or Track.bar
height int 200 size of the track
tools str xwheel_zoom, ywheel_zoom, pan, box_zoom, save, reset comma separated list of Bokeh tools that can be used to navigate the plot
kwargs

Adding a track with random points as a demonstration. genomeNotebook uses the Bokeh library and track.fig is a simple Bokeh figure on which you can plot anything you want using Bokeh.

from genomenotebook.browser import GenomeBrowser
from genomenotebook.data import get_example_data_dir
import os
import numpy as np
data_path = get_example_data_dir()
fasta_path = os.path.join(data_path, "MG1655_U00096.fasta")
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")

source

Track.set_figure_data_source

 Track.set_figure_data_source (fig, pos, loaded_range)

source

Track.set_track_data_source

 Track.set_track_data_source (data:pandas.core.frame.DataFrame, pos,
                              columns:List[str])
Type Details
data DataFrame data to be plotted
pos
columns List columns to store as data

source

Track.line

 Track.line (data:pandas.core.frame.DataFrame, pos:str, y:str,
             hover_data:List[str]=None, **kwargs)
Type Default Details
data DataFrame pandas DataFrame containing the data
pos str name of the column containing the positions along the genome
y str name of the column containing the data to be plotted on the y-axis
hover_data List None list of column names to be shown when hovering over the data
kwargs

Additional kwargs are passed as is to bokeh.plotting.figure.line

Plotting some ChIP-seq data

g=GenomeBrowser(fasta_path=fasta_path, 
                gff_path=gff_path, 
                init_pos=50000,
                bounds=(30000,85000), 
                search=False, 
                show_seq=False)

#Importing some coverage data from a BigWig file
bw_file_path=os.path.join(data_path,"ChIP-ACCCA-1.bw")
refname='NC_000913'
with pyBigWig.open(bw_file_path) as bw:
    cov=bw.values(refname,0,g.seq_len,numpy=True)
    
data=pd.DataFrame({"pos": np.arange(0,g.seq_len,10),
                     "cov": cov[::10]})

track=g.add_track()
track.line(data,pos="pos",y="cov", 
           line_color="blue",
           line_width=2)

g.show()

source

Track.scatter

 Track.scatter (data:pandas.core.frame.DataFrame, pos:str, y:str,
                factors:str=None, hover_data:List=None, **kwargs)
Type Default Details
data DataFrame pandas DataFrame containing the data
pos str name of the column containing the positions along the genome
y str name of the column containing the data to be plotted on the y-axis
factors str None name of a column of values to be used as factors
hover_data List None list of additional column names to be shown when hovering over the data
kwargs

Additional kwargs are passed as is to bokeh.plotting.figure.scatter

Plotting some CRISPR screen data

import pandas as pd
#Opening the Cui 2018 CRISPRi screen data
cui2018data="https://gitlab.pasteur.fr/dbikard/badSeed_public/raw/master/screen_data.csv"
cui2018data=pd.read_csv(cui2018data)
cui2018data.head()
guide gene essential pos ori coding fit18 fit75 ntargets seq
0 AAAAAACCTGCTGGTGAGGC NaN NaN 2202483 - NaN -4.850012 -1.437546 1 AAAGCAGATCACAGTAAATAAAAAAACCTGCTGGTGAGGCAGGTTC...
1 AAAAAACGTATTCGCTTGCA curA False 1517891 + False -0.094026 -0.100313 1 TGTTGATGGCTACAGTGCTGAAAAAACGTATTCGCTTGCAAGGTTT...
2 AAAAAAGCGCACTTTTTGAC NaN NaN 1919717 + NaN -1.109310 -0.246740 1 GTAACGCCTGACAGCGCACAAAAAAAGCGCACTTTTTGACTGGCAC...
3 AAAAAAGCGGTGACTTACGA bglA False 3042929 + False -1.328831 -0.905068 1 GCGCCCATATCGAAGAGATGAAAAAAGCGGTGACTTACGATGGCGT...
4 AAAAAATCTGCCCGTGTCGT gyrA True 2337231 - False -0.840373 -0.598858 1 ATGACTGGAACAAAGCCTATAAAAAATCTGCCCGTGTCGTTGGTGA...
g=GenomeBrowser(fasta_path=fasta_path, gff_path=gff_path, bounds=(0,100000), search=False, show_seq=False)

track=g.add_track(height=150)
track.scatter(data=cui2018data,pos="pos",y="fit75",factors="ori", hover_data=["guide"])

track2=g.add_track(height=150)
track2.scatter(data=cui2018data,pos="pos",y="fit18",factors="ori")
g.show()

source

Track.bar

 Track.bar (data:pandas.core.frame.DataFrame, pos:str, y:str,
            factors:str=None, hover_data:List=None, **kwargs)
Type Default Details
data DataFrame pandas DataFrame containing the data
pos str name of the column containing the positions along the genome
y str name of the column containing the data to be plotted on the y-axis
factors str None name of a column of values to be used as factors
hover_data List None list of additional column names to be shown when hovering over the data
kwargs

Additional kwargs are passed as is to bokeh.plotting.figure.vbar

Showing the same data as vertical bars

g=GenomeBrowser(fasta_path=fasta_path, gff_path=gff_path, bounds=(0,100000))
track=g.add_track()
track.bar(data=cui2018data,pos="pos",y="fit75",factors="ori")
g.show()

source

Track.custom

 Track.custom (func:Callable=None)
Type Default Details
func Callable None function to be called. First argument is the figure
g=GenomeBrowser(gff_path=gff_path, bounds=(0,100000), search=False)
track = g.add_track()

x= np.arange(0,100000,100)
y= np.random.randint(0,10,size=x.shape)
track.custom(lambda fig: fig.scatter(x=x,y=y))
g.show()

source

Track.highlight

 Track.highlight (data:pandas.core.frame.DataFrame=None,
                  left_col:str='left', right_col:str='right',
                  color_col:str='color', alpha_col:str='alpha', left=None,
                  right=None, color='green', alpha:str=0.2,
                  hover_data:List[str]=None, **kwargs)
Type Default Details
data DataFrame None pandas DataFrame containing the data
left_col str left name of the column containing the start positions of the regions
right_col str right name of the column containing the end positions of the regions
color_col str color name of the column containing color of the regions
alpha_col str alpha name of the column containing alpha of the regions
left NoneType None
right NoneType None
color str green
alpha str 0.2 transparency
hover_data List None list of additional column names to be shown when hovering over the data
kwargs

Additional kwargs are passed as is to bokeh.models.Rect

g=GenomeBrowser(fasta_path=fasta_path, gff_path=gff_path, bounds=(0,10000), search=False)
track=g.add_track()
track.scatter(data=cui2018data,pos="pos",y="fit75",factors="ori")
highlight_regions=pd.DataFrame({"left": [5000, 8000], "right": [6000, 8500], "color": ["red","green"]})
track.highlight(data=highlight_regions, left_col="left", right_col="right", color_col="color")
g.show()
g=GenomeBrowser(fasta_path=fasta_path, gff_path=gff_path, bounds=(0,10000), search=False)
track=g.add_track()
track.scatter(data=cui2018data,pos="pos",y="fit75",factors="ori")

highlight_regions=pd.DataFrame({"left": [5000, 8000], "right": [6000, 8500], "color": ["red","green"]})

g.highlight(data=highlight_regions, left="left", right="right", color="color", highlight_tracks=True)
g.show()