browser

Contains the GenomeBrowser class

source

GenomeBrowser

 GenomeBrowser (gff_path:str, genome_path:str=None, seq_id:str=None,
                init_pos:int=None, init_win:int=10000, bounds:tuple=None,
                max_interval:int=100000, show_seq:bool=True,
                search:bool=True, attributes:list=['gene', 'locus_tag',
                'product'], feature_name:str='gene',
                feature_types:list=['CDS', 'repeat_region', 'ncRNA',
                'rRNA', 'tRNA'], glyphs:dict=None, height:int=150,
                width:int=600, label_angle:int=45,
                label_font_size:str='10pt', feature_height:float=0.15,
                output_backend:str='webgl', **kwargs)

Initialize a GenomeBrowser object.

Type Default Details
gff_path str path to the gff3 file of the annotations (also accepts gzip files)
genome_path str None path to the fasta file of the genome sequence
seq_id str None id of the sequence to show for genomes with multiple contigs
init_pos int None initial position to display
init_win int 10000 initial window size (max=20000)
bounds tuple None bounds can be specified. This helps preserve memory by not loading the whole genome if not needed.
max_interval int 100000 maximum size of the field of view in bp
show_seq bool True shows the sequence when zooming in
search bool True enables a search bar
attributes list [‘gene’, ‘locus_tag’, ‘product’] list of attribute names from the GFF attributes column to be extracted
feature_name str gene attribute to be displayed as the feature name
feature_types list [‘CDS’, ‘repeat_region’, ‘ncRNA’, ‘rRNA’, ‘tRNA’] list of feature types to display
glyphs dict None dictionnary defining the type and color of glyphs to display for each feature type
height int 150 height of the annotation track
width int 600 width of the inner frame of the browser
label_angle int 45 angle of the feature names displayed on top of the features
label_font_size str 10pt font size fo the feature names
feature_height float 0.15 fraction of the annotation track height occupied by the features
output_backend str webgl can be “webgl” or “svg”. webgl is more efficient but svg is a vectorial format that can be conveniently modified using other software
kwargs

Additional keyword arguments are passed as is to bokeh.plotting.figure

from genomenotebook.data import get_example_data_dir
import os
data_path = get_example_data_dir()
genome_path = os.path.join(data_path, "MG1655_U00096.fasta")
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")

g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,50000),width=600)
g.show()
#Providing GFF file as only input
g=GenomeBrowser(gff_path)
g.show()
#List available attributes
from genomenotebook.utils import available_attributes, available_feature_types
available_attributes(gff_path)
Index(['seq_id', 'source', 'type', 'start', 'end', 'score', 'strand', 'phase',
       'attributes', 'Name', 'mobile_element_type', 'Is_circular',
       'recombination_class', 'gbkey', 'protein_id', 'exception', 'pseudo',
       'gene_synonym', 'orig_transcript_id', 'strain', 'part', 'gene',
       'mol_type', 'transl_except', 'ID', 'substrain', 'genome', 'rpt_type',
       'Note', 'Dbxref', 'product', 'transl_table', 'orig_protein_id',
       'locus_tag', 'Parent', 'gene_biotype', 'left', 'right', 'middle'],
      dtype='object')
#Showing different attributes from the GFF file
g=GenomeBrowser(gff_path, attributes=["locus_tag","protein_id",'gene','product'],feature_name="protein_id")
g.show()

source

GenomeBrowser.add_track

 GenomeBrowser.add_track (height:int=200, tools:str='xwheel_zoom,
                          ywheel_zoom, pan, box_zoom, save, reset',
                          **kwargs)

Adds a track to the GenomeBrowser. Ensures that the x_range are shared and figure widths are identical.

Type Default Details
height int 200 size of the track
tools str xwheel_zoom, ywheel_zoom, pan, box_zoom, save, reset comma separated list of Bokeh tools that can be used to navigate the plot
kwargs
Returns Track
data_path = get_example_data_dir()
genome_path = os.path.join(data_path, "MG1655_U00096.fasta")
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")

data=pd.DataFrame(dict(x=np.arange(0,50000,100),
                       y=np.sin(np.arange(0,50000,100))))

g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,5000), search=False, show_seq=False)

track = g.add_track(height=100)
track.scatter(data=data,pos="x",y="y")
g.show()

source

GenomeBrowser.highlight

 GenomeBrowser.highlight (data:pandas.core.frame.DataFrame,
                          left:str='left', right:str='right',
                          color:str='color', alpha:str=0.2,
                          hover_data:list=[], highlight_tracks:bool=False,
                          **kwargs)
Type Default Details
data DataFrame pandas DataFrame containing the data
left str left name of the column containing the start positions of the regions
right str right name of the column containing the end positions of the regions
color str color color of the regions
alpha str 0.2 transparency
hover_data list [] list of additional column names to be shown when hovering over the data
highlight_tracks bool False whether to highlight just the annotation track or also the other tracks
kwargs
import pandas as pd
g=GenomeBrowser(gff_path=gff_path, genome_path=genome_path, bounds=(0,10000))
highlight_regions=pd.DataFrame({"start": [5000, 8000], "stop": [6000, 8500], "color": ["red","green"], "y":[23, 45]})
g.highlight(data=highlight_regions, left="start", right="stop", hover_data=["y"])
g.show()
data=pd.DataFrame(dict(x=np.arange(0,50000,100),
                       y=np.sin(np.arange(0,50000,100))))

g=GenomeBrowser(genome_path=genome_path, gff_path=gff_path, bounds=(0,5000), search=False, show_seq=False)
track = g.add_track(height=100)
track.scatter(data=data,pos="x",y="y")

highlight_regions=pd.DataFrame({"start": [2000, 4000], "stop": [3000, 4500], "color": ["red","green"], "y":[23, 45]})
g.highlight(data=highlight_regions, left="start", right="stop", hover_data=["y"], highlight_tracks=True)

g.show()

source

GenomeBrowser.save

 GenomeBrowser.save (fname:str)

This function saves the initial plot that is generated and not the current view of the browser. To save in svg format you must initialise your GenomeBrowser using output_backend="svg"

Type Details
fname str path to file or a simple name (extensions are automatically added)

Saving to svg

Plots can only be saved to svg if you initialise your GenomeBrowser using output_backend="svg"

g=GenomeBrowser(gff_path=gff_path, 
                bounds=(0,5000),
                output_backend="svg",
                search=False)
track = g.add_track(height=100)
track.fig.scatter(x=np.arange(0,5000,100),y=np.sin(np.arange(0,5000,100)))
g.show()
g.save("test.svg")

Saving to png

g=GenomeBrowser(genome_path=genome_path, 
                gff_path=gff_path,
                bounds=(0,5000),
                search=False,
                height=200,
                width=2000,
                label_font_size="20pt")
g.save("test.png")