browser

Contains the GenomeBrowser and GenomeStack classes

GenomeBrowser

 GenomeBrowser (gff_path:str=None, fasta_path:str=None, gb_path:str=None,
                seq_id:str=None, init_pos:int=None, init_win:int=10000,
                bounds:tuple=None, max_interval:int=100000,
                show_seq:bool=True, search:bool=True,
                attributes:Union[list,Dict[str,Optional[list]]]=None,
                feature_name:Union[str,Dict[str,str],NoneType]=None,
                feature_types:list=None, glyphs:dict=None, height:int=150,
                width:int=600, label_angle:int=45,
                label_font_size:str='10pt', label_justify:str='center',
                label_vertical_offset:float=0.03,
                label_horizontal_offset:float=-5, show_labels:bool=True,
                feature_height:float=0.15,
                features:pandas.core.frame.DataFrame=None,
                seq:Bio.Seq.Seq=None, color_attribute:str=None,
                z_stack:bool=False, **kwargs)

Initialize a GenomeBrowser object.

	Type	Default	Details
gff_path	str	None	path to the gff3 file of the annotations (also accepts gzip files)
fasta_path	str	None	path to the fasta file of the genome sequence
gb_path	str	None	path to a genbank file
seq_id	str	None	id of the sequence to load, for genomes with multiple contigs, defaults to the first sequence in the genbank or gff file.
init_pos	int	None	initial position to display
init_win	int	10000	initial window size (max=20000)
bounds	tuple	None	bounds can be specified. This helps preserve memory by not loading the whole genome if not needed.
max_interval	int	100000	maximum size of the field of view in bp
show_seq	bool	True	creates a html div that shows the sequence when zooming in
search	bool	True	enables a search bar
attributes	Union	None	list of attribute names from the GFF attributes column to be extracted. If dict then keys are feature types and values are lists of attributes. If None, then all attributes will be used.
feature_name	Union	None	attribute to be displayed as the feature name. If str then use the same field for every feature type. If dict then keys are feature types and values are feature name attribute.
feature_types	list	None	list of feature types to display
glyphs	dict	None	dictionary defining the type and color of glyphs to display for each feature type
height	int	150	height of the annotation track
width	int	600	width of the inner frame of the browser
label_angle	int	45	angle of the feature names displayed on top of the features
label_font_size	str	10pt	font size fo the feature names
label_justify	str	center	center, left
label_vertical_offset	float	0.03	how far above a feature to draw the label
label_horizontal_offset	float	-5	how far to shift the feature label on the x-axis
show_labels	bool	True	if False, then don’t show feature labels
feature_height	float	0.15	fraction of the annotation track height occupied by the features
features	DataFrame	None	DataFrame with columns: [“seq_id”, “source”, “type”, “start”, “end”, “score”, “strand”, “phase”, “attributes”], where “attributes” is a dict of attributes.
seq	Seq	None	keeps the Biopython sequence object
color_attribute	str	None	feature attribute to be used as patch color
z_stack	bool	False	if true features that overlap will be stacked on top of each other
kwargs

Additional keyword arguments are passed as is to bokeh.plotting.figure

Upon initialization a GenomBrowser object parses the data and creates a the GenomeBrowser.patches pandas DataFrame that contains the data to be plotted.

from genomenotebook.data import get_example_data_dir
import os

data_path = get_example_data_dir()
fasta_path = os.path.join(data_path, "MG1655_U00096.fasta")
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")

g=GenomeBrowser(gff_path=gff_path, fasta_path=fasta_path, bounds=(0,50000))
print(g.seq_id, g.seq[:10])
g.patches.head()

U00096.3 AGCTTTTCAT

	names	xs	ys	xbox_min	color	alpha	pos	attributes	type	label_y	label_x
0	thrL	(190, 190, 190, 255, 190)	(0.05, 0.2, 0.2, 0.125, 0.05)	190	purple	0.8	222.5	<span style="color:FireBrick">CDS</span><br><s...	CDS	0.23	222.5
1	thrA	(337, 337, 2699, 2799, 2699)	(0.05, 0.2, 0.2, 0.125, 0.05)	337	purple	0.8	1568.0	<span style="color:FireBrick">CDS</span><br><s...	CDS	0.23	1568.0
2	thrB	(2801, 2801, 3633, 3733, 3633)	(0.05, 0.2, 0.2, 0.125, 0.05)	2801	purple	0.8	3267.0	<span style="color:FireBrick">CDS</span><br><s...	CDS	0.23	3267.0
3	thrC	(3734, 3734, 4920, 5020, 4920)	(0.05, 0.2, 0.2, 0.125, 0.05)	3734	purple	0.8	4377.0	<span style="color:FireBrick">CDS</span><br><s...	CDS	0.23	4377.0
4	yaaX	(5234, 5234, 5430, 5530, 5430)	(0.05, 0.2, 0.2, 0.125, 0.05)	5234	purple	0.8	5382.0	<span style="color:FireBrick">CDS</span><br><s...	CDS	0.23	5382.0

source

GenomeBrowser.show

 GenomeBrowser.show ()

Shows the plot in an interactive Jupyter notebook

#GFF + FASTA input
g=GenomeBrowser(fasta_path=fasta_path, gff_path=gff_path, bounds=(0,50000),width=600)
g.show()

#GenBank input
gb_path=os.path.join(data_path, "colored_genbank.gb")
g=GenomeBrowser(gb_path=gb_path)
g.show()

#Providing GFF file as the only input
g=GenomeBrowser(gff_path)
g.show()

source

GenomeBrowser.add_track

 GenomeBrowser.add_track (height:int=200, tools:str='xwheel_zoom,
                          ywheel_zoom, pan, box_zoom, save, reset',
                          **kwargs)

Adds a track to the GenomeBrowser. Ensures that the x_range are shared and figure widths are identical.

	Type	Default	Details
height	int	200	size of the track
tools	str	xwheel_zoom, ywheel_zoom, pan, box_zoom, save, reset	comma separated list of Bokeh tools that can be used to navigate the plot
kwargs
Returns	Track

fasta_path = os.path.join(data_path, "MG1655_U00096.fasta")
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")

data=pd.DataFrame(dict(x=np.arange(0,50000,100),
                       y=np.sin(np.arange(0,50000,100))))

g=GenomeBrowser(fasta_path=fasta_path, gff_path=gff_path, bounds=(0,5000), search=False, show_seq=False)

track = g.add_track(height=100)
track.scatter(data=data,pos="x",y="y")
g.show()

source

GenomeBrowser.highlight

 GenomeBrowser.highlight (data:pandas.core.frame.DataFrame=None,
                          left_col:str='left', right_col:str='right',
                          color_col:str='color', alpha_col:str='alpha',
                          left=None, right=None, color='green',
                          alpha:str=0.2, hover_data:List=None, **kwargs)

	Type	Default	Details
data	DataFrame	None	pandas DataFrame containing the data
left_col	str	left	name of the column containing the start positions of the regions
right_col	str	right	name of the column containing the end positions of the regions
color_col	str	color	name of the column containing color of the regions
alpha_col	str	alpha	name of the column containing alpha (transparency) of the regions
left	NoneType	None
right	NoneType	None
color	str	green
alpha	str	0.2	transparency
hover_data	List	None	list of additional column names to be shown when hovering over the data
kwargs

import pandas as pd

highlight_regions=pd.DataFrame({"start": [5000, 8000], "stop": [6000, 8500], "color": ["red","green"], "y":[23, 45]})

g=GenomeBrowser(gff_path=gff_path, fasta_path=fasta_path, bounds=(0,10000))
g.highlight(data=highlight_regions, left_col="start", right_col="stop", hover_data=["y"])
g.show()

data=pd.DataFrame(dict(x=np.arange(0,50000,100),
                       y=np.sin(np.arange(0,50000,100))))

g=GenomeBrowser(fasta_path=fasta_path, gff_path=gff_path, bounds=(0,5000), search=False, show_seq=False)
track = g.add_track(height=100)
track.scatter(data=data,pos="x",y="y")

highlight_regions=pd.DataFrame({"start": [2000, 4000], "stop": [3000, 4500], "color": ["red","green"], "y":[23, 45]})
g.highlight(data=highlight_regions, left_col="start", right_col="stop", hover_data=["y"], highlight_tracks=True)

g.show()

source

GenomeBrowser.add_tooltip_data

 GenomeBrowser.add_tooltip_data (name:str, values:str,
                                 feature_type:str=None)

	Type	Default	Details
name	str		name of the data to be added
values	str		values
feature_type	str	None	specify the feature type if the data applies only a to specific feature_type

g=GenomeBrowser(gff_path=gff_path, attributes=["gene", "locus_tag"],bounds=(0,10000))
data=np.random.randint(0,10,len(g.patches))
g.add_tooltip_data("data",data)
g.show()

source

GenomeBrowser.save

 GenomeBrowser.save (fname:str, title:str='Genome Plot')

Saves the plot in svg or png. This function saves the initial plot that is generated and not the current view of the browser. To save in svg format you must initialise your GenomeBrowser using output_backend="svg"

	Type	Default	Details
fname	str		file name (must end in .svg or . png).If using svg, GenomeBrowser needs to be initialized with `output_backend="svg"`
title	str	Genome Plot	plot title

Saving to svg

g=GenomeBrowser(gff_path=gff_path, 
                bounds=(0,5000),
                search=False)
track = g.add_track(height=100)
track.scatter(data=pd.DataFrame(dict(x=np.arange(0,5000,100),y=np.sin(np.arange(0,5000,100)))), y="y", pos="x")
g.save("test.svg")
g.show()

Saving to png

g=GenomeBrowser(fasta_path=fasta_path, 
                gff_path=gff_path,
                bounds=(0,5000),
                search=False,
                height=200,
                width=2000,
                label_font_size="20pt")
g.save("test.png")

source

GenomeStack

 GenomeStack (browsers=None)

Initialize self. See help(type(self)) for accurate signature.

data_path = get_example_data_dir()
gb_path = os.path.join(data_path, "colored_genbank.gb")
g = GenomeStack.from_genbank(gb_path,
                                width=700, 
                                show_seq=True, 
                                search=False, 
                                feature_types=["CDS", "Domainator"], 
                                color_attribute="Color", 
                                label_angle=0, 
                                show_labels=True, 
                                feature_height=0.15, 
                                label_vertical_offset=-0.15, 
                                label_justify="left", 
                                label_horizontal_offset = 5, 
                                glyphs=get_default_glyphs(box_colors=("Cyan",)), 
                                feature_name={"CDS":"gene_id","Domainator":"name"},
                                z_stack=True) 
g.show()