track

Contains the Track class and plotting functions

Track

 Track (ylim:tuple=None, height:int=200, tools:str='xwheel_zoom,
        ywheel_zoom, pan, box_zoom, save, reset', output_backend='webgl',
        **kwargs)

Track objects should only be created through GenomeBrowser.add_track

	Type	Default	Details
ylim	tuple	None	limits of the y axis. If not specified, ylim will be set automatically with the max and min of the data plotted with Track.line, Track.scatter or Track.bar
height	int	200	size of the track
tools	str	xwheel_zoom, ywheel_zoom, pan, box_zoom, save, reset	comma separated list of Bokeh tools that can be used to navigate the plot
output_backend	str	webgl
kwargs

Adding a track with random points as a demonstration. genomeNotebook uses the Bokeh library and track.fig is a simple Bokeh figure on which you can plot anything you want using Bokeh.

from genomenotebook.browser import GenomeBrowser
from genomenotebook.data import get_example_data_dir
import os
import numpy as np

data_path = get_example_data_dir()
genome_path = os.path.join(data_path, "MG1655_U00096.fasta")
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")

g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,100000), search=False, show_seq=False,toolbar_location="above")

track = g.add_track(toolbar_location="above")

x= np.arange(0,100000,100)
y= np.random.randint(0,10,size=x.shape)
track.fig.scatter(x=x,y=y)
g.show()

source

Track.line

 Track.line (data:pandas.core.frame.DataFrame, pos:str, y:str,
             hover_data:list=[], **kwargs)

	Type	Default	Details
data	DataFrame		pandas DataFrame containing the data
pos	str		name of the column containing the positions along the genome
y	str		name of the column containing the data to be plotted on the y-axis
hover_data	list	[]	list of column names to be shown when hovering over the data
kwargs

Additional kwargs are passed as is to bokeh.plotting.figure.line

Plotting some ChIP-seq data

g=GenomeBrowser(genome_path=genome_path, 
                gff_path=gff_path, 
                init_pos=50000,
                bounds=(30000,85000), 
                search=False, 
                show_seq=False)

#Importing some coverage data from a BigWig file
bw_file_path=os.path.join(data_path,"ChIP-ACCCA-1.bw")
refname='NC_000913'
with pyBigWig.open(bw_file_path) as bw:
    cov=bw.values(refname,0,g.seq_len,numpy=True)
    
data=pd.DataFrame({"pos": np.arange(0,g.seq_len,10),
                     "cov": cov[::10]})

track=g.add_track()
track.line(data,pos="pos",y="cov", 
           line_color="blue",
           line_width=2)

g.show()

AttributeError: 'Track' object has no attribute 'ylim'

source

Track.scatter

 Track.scatter (data:pandas.core.frame.DataFrame, pos:str, y:str,
                factors:str=None, hover_data:list=[], **kwargs)

	Type	Default	Details
data	DataFrame		pandas DataFrame containing the data
pos	str		name of the column containing the positions along the genome
y	str		name of the column containing the data to be plotted on the y-axis
factors	str	None	name of a column of values to be used as factors
hover_data	list	[]	list of additional column names to be shown when hovering over the data
kwargs

Additional kwargs are passed as is to bokeh.plotting.figure.scatter

Plotting some CRISPR screen data

import pandas as pd

#Opening the Cui 2018 CRISPRi screen data
cui2018data="https://gitlab.pasteur.fr/dbikard/badSeed_public/raw/master/screen_data.csv"
cui2018data=pd.read_csv(cui2018data)
cui2018data.head()

	guide	gene	essential	pos	ori	coding	fit18	fit75	ntargets	seq
0	AAAAAACCTGCTGGTGAGGC	NaN	NaN	2202483	-	NaN	-4.850012	-1.437546	1	AAAGCAGATCACAGTAAATAAAAAAACCTGCTGGTGAGGCAGGTTC...
1	AAAAAACGTATTCGCTTGCA	curA	False	1517891	+	False	-0.094026	-0.100313	1	TGTTGATGGCTACAGTGCTGAAAAAACGTATTCGCTTGCAAGGTTT...
2	AAAAAAGCGCACTTTTTGAC	NaN	NaN	1919717	+	NaN	-1.109310	-0.246740	1	GTAACGCCTGACAGCGCACAAAAAAAGCGCACTTTTTGACTGGCAC...
3	AAAAAAGCGGTGACTTACGA	bglA	False	3042929	+	False	-1.328831	-0.905068	1	GCGCCCATATCGAAGAGATGAAAAAAGCGGTGACTTACGATGGCGT...
4	AAAAAATCTGCCCGTGTCGT	gyrA	True	2337231	-	False	-0.840373	-0.598858	1	ATGACTGGAACAAAGCCTATAAAAAATCTGCCCGTGTCGTTGGTGA...

g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,100000), search=False, show_seq=False)

track=g.add_track(height=100)
track.scatter(data=cui2018data,pos="pos",y="fit75",factors="ori", hover_data=["guide"])

track2=g.add_track(height=100)
track2.scatter(data=cui2018data,pos="pos",y="fit18",factors="ori")
g.show()

source

Track.bar

 Track.bar (data:pandas.core.frame.DataFrame, pos:str, y:str,
            factors:str=None, hover_data:list=[], **kwargs)

	Type	Default	Details
data	DataFrame		pandas DataFrame containing the data
pos	str		name of the column containing the positions along the genome
y	str		name of the column containing the data to be plotted on the y-axis
factors	str	None	name of a column of values to be used as factors
hover_data	list	[]	list of additional column names to be shown when hovering over the data
kwargs

Additional kwargs are passed as is to bokeh.plotting.figure.vbar

Showing the same data as vertical bars

g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,100000))
track=g.add_track()
track.bar(data=cui2018data,pos="pos",y="fit75",factors="ori")
g.show()

source

Track.highlight

 Track.highlight (data:pandas.core.frame.DataFrame, left:str='left',
                  right:str='right', color:str='color', alpha:str=0.2,
                  hover_data:list=[], **kwargs)

	Type	Default	Details
data	DataFrame		pandas DataFrame containing the data
left	str	left	name of the column containing the start positions of the regions
right	str	right	name of the column containing the end positions of the regions
color	str	color	color of the regions
alpha	str	0.2	transparency
hover_data	list	[]	list of additional column names to be shown when hovering over the data
kwargs

Additional kwargs are passed as is to bokeh.models.Rect

g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,10000), search=False)
track=g.add_track()
track.scatter(data=cui2018data,pos="pos",y="fit75",factors="ori")
highlight_regions=pd.DataFrame({"left": [5000, 8000], "right": [6000, 8500], "color": ["red","green"]})
track.highlight(data=highlight_regions, left="left", right="right", color="color")
g.show()

g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,10000), search=False)
track=g.add_track()
track.scatter(data=cui2018data,pos="pos",y="fit75",factors="ori")

highlight_regions=pd.DataFrame({"left": [5000, 8000], "right": [6000, 8500], "color": ["red","green"]})

g.highlight(data=highlight_regions, left="left", right="right", color="color", highlight_tracks=True)
g.show()