from genomenotebook.browser import GenomeBrowser
from genomenotebook.data import get_example_data_dir
import os
import numpy as nptrack
Contains the Track class and plotting functions
Track
Track (ylim:tuple=None, height:int=200, tools:str='xwheel_zoom, ywheel_zoom, pan, box_zoom, save, reset', **kwargs)
Track objects should only be created through GenomeBrowser.add_track
| Type | Default | Details | |
|---|---|---|---|
| ylim | tuple | None | limits of the y axis. If not specified, ylim will be set automatically with the max and min of the data plotted with Track.line, Track.scatter or Track.bar |
| height | int | 200 | size of the track |
| tools | str | xwheel_zoom, ywheel_zoom, pan, box_zoom, save, reset | comma separated list of Bokeh tools that can be used to navigate the plot |
| kwargs | VAR_KEYWORD |
Adding a track with random points as a demonstration. genomeNotebook uses the Bokeh library and track.fig is a simple Bokeh figure on which you can plot anything you want using Bokeh.
data_path = get_example_data_dir()
fasta_path = os.path.join(data_path, "MG1655_U00096.fasta")
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")Track.set_figure_data_source
Track.set_figure_data_source (fig, pos, loaded_range)
Track.set_track_data_source
Track.set_track_data_source (data:pandas.core.frame.DataFrame, pos, columns:List[str])
| Type | Details | |
|---|---|---|
| data | DataFrame | data to be plotted |
| pos | ||
| columns | List | columns to store as data |
Track.line
Track.line (data:pandas.core.frame.DataFrame, pos:str, y:str, hover_data:List[str]=None, **kwargs)
| Type | Default | Details | |
|---|---|---|---|
| data | DataFrame | pandas DataFrame containing the data | |
| pos | str | name of the column containing the positions along the genome | |
| y | str | name of the column containing the data to be plotted on the y-axis | |
| hover_data | List | None | list of column names to be shown when hovering over the data |
| kwargs | VAR_KEYWORD |
Additional kwargs are passed as is to bokeh.plotting.figure.line
Plotting some ChIP-seq data
g=GenomeBrowser(fasta_path=fasta_path,
gff_path=gff_path,
init_pos=50000,
bounds=(30000,85000),
search=False,
show_seq=False)
#Importing some coverage data from a BigWig file
bw_file_path=os.path.join(data_path,"ChIP-ACCCA-1.bw")
refname='NC_000913'
with pyBigWig.open(bw_file_path) as bw:
cov=bw.values(refname,0,g.seq_len,numpy=True)
data=pd.DataFrame({"pos": np.arange(0,g.seq_len,10),
"cov": cov[::10]})
track=g.add_track()
track.line(data,pos="pos",y="cov",
line_color="blue",
line_width=2)
g.show()Track.scatter
Track.scatter (data:pandas.core.frame.DataFrame, pos:str, y:str, factors:str=None, hover_data:List=None, **kwargs)
| Type | Default | Details | |
|---|---|---|---|
| data | DataFrame | pandas DataFrame containing the data | |
| pos | str | name of the column containing the positions along the genome | |
| y | str | name of the column containing the data to be plotted on the y-axis | |
| factors | str | None | name of a column of values to be used as factors |
| hover_data | List | None | list of additional column names to be shown when hovering over the data |
| kwargs | VAR_KEYWORD |
Additional kwargs are passed as is to bokeh.plotting.figure.scatter
Plotting some CRISPR screen data
import pandas as pd#Opening the Cui 2018 CRISPRi screen data
cui2018data="https://gitlab.pasteur.fr/dbikard/badSeed_public/raw/master/screen_data.csv"
cui2018data=pd.read_csv(cui2018data)
cui2018data.head()| guide | gene | essential | pos | ori | coding | fit18 | fit75 | ntargets | seq | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | AAAAAACCTGCTGGTGAGGC | NaN | NaN | 2202483 | - | NaN | -4.850012 | -1.437546 | 1 | AAAGCAGATCACAGTAAATAAAAAAACCTGCTGGTGAGGCAGGTTC... |
| 1 | AAAAAACGTATTCGCTTGCA | curA | False | 1517891 | + | False | -0.094026 | -0.100313 | 1 | TGTTGATGGCTACAGTGCTGAAAAAACGTATTCGCTTGCAAGGTTT... |
| 2 | AAAAAAGCGCACTTTTTGAC | NaN | NaN | 1919717 | + | NaN | -1.109310 | -0.246740 | 1 | GTAACGCCTGACAGCGCACAAAAAAAGCGCACTTTTTGACTGGCAC... |
| 3 | AAAAAAGCGGTGACTTACGA | bglA | False | 3042929 | + | False | -1.328831 | -0.905068 | 1 | GCGCCCATATCGAAGAGATGAAAAAAGCGGTGACTTACGATGGCGT... |
| 4 | AAAAAATCTGCCCGTGTCGT | gyrA | True | 2337231 | - | False | -0.840373 | -0.598858 | 1 | ATGACTGGAACAAAGCCTATAAAAAATCTGCCCGTGTCGTTGGTGA... |
g=GenomeBrowser(fasta_path=fasta_path, gff_path=gff_path, bounds=(0,100000), search=False, show_seq=False)
track=g.add_track(height=150)
track.scatter(data=cui2018data,pos="pos",y="fit75",factors="ori", hover_data=["guide"])
track2=g.add_track(height=150)
track2.scatter(data=cui2018data,pos="pos",y="fit18",factors="ori")
g.show()Track.bar
Track.bar (data:pandas.core.frame.DataFrame, pos:str, y:str, factors:str=None, hover_data:List=None, **kwargs)
| Type | Default | Details | |
|---|---|---|---|
| data | DataFrame | pandas DataFrame containing the data | |
| pos | str | name of the column containing the positions along the genome | |
| y | str | name of the column containing the data to be plotted on the y-axis | |
| factors | str | None | name of a column of values to be used as factors |
| hover_data | List | None | list of additional column names to be shown when hovering over the data |
| kwargs | VAR_KEYWORD |
Additional kwargs are passed as is to bokeh.plotting.figure.vbar
Showing the same data as vertical bars
g=GenomeBrowser(fasta_path=fasta_path, gff_path=gff_path, bounds=(0,100000))
track=g.add_track()
track.bar(data=cui2018data,pos="pos",y="fit75",factors="ori")
g.show()Track.custom
Track.custom (func:Callable=None)
| Type | Default | Details | |
|---|---|---|---|
| func | Callable | None | function to be called. First argument is the figure |
g=GenomeBrowser(gff_path=gff_path, bounds=(0,100000), search=False)
track = g.add_track()
x= np.arange(0,100000,100)
y= np.random.randint(0,10,size=x.shape)
track.custom(lambda fig: fig.scatter(x=x,y=y))
g.show()Track.highlight
Track.highlight (data:pandas.core.frame.DataFrame=None, left_col:str='left', right_col:str='right', color_col:str='color', alpha_col:str='alpha', left=None, right=None, color='green', alpha:str=0.2, hover_data:List[str]=None, **kwargs)
| Type | Default | Details | |
|---|---|---|---|
| data | DataFrame | None | pandas DataFrame containing the data |
| left_col | str | left | name of the column containing the start positions of the regions |
| right_col | str | right | name of the column containing the end positions of the regions |
| color_col | str | color | name of the column containing color of the regions |
| alpha_col | str | alpha | name of the column containing alpha of the regions |
| left | NoneType | None | |
| right | NoneType | None | |
| color | str | green | |
| alpha | str | 0.2 | transparency |
| hover_data | List | None | list of additional column names to be shown when hovering over the data |
| kwargs | VAR_KEYWORD |
Additional kwargs are passed as is to bokeh.models.Rect
g=GenomeBrowser(fasta_path=fasta_path, gff_path=gff_path, bounds=(0,10000), search=False)
track=g.add_track()
track.scatter(data=cui2018data,pos="pos",y="fit75",factors="ori")
highlight_regions=pd.DataFrame({"left": [5000, 8000], "right": [6000, 8500], "color": ["red","green"]})
track.highlight(data=highlight_regions, left_col="left", right_col="right", color_col="color")
g.show()g=GenomeBrowser(fasta_path=fasta_path, gff_path=gff_path, bounds=(0,10000), search=False)
track=g.add_track()
track.scatter(data=cui2018data,pos="pos",y="fit75",factors="ori")
highlight_regions=pd.DataFrame({"left": [5000, 8000], "right": [6000, 8500], "color": ["red","green"]})
g.highlight(data=highlight_regions, left="left", right="right", color="color", highlight_tracks=True)
g.show()