import genomenotebook as gn
import os
Options
Stack overlapping features vertically using z_stack=True
= gn.get_example_data_dir()
data_path = os.path.join(data_path, "MG1655_U00096.gff3")
gff_path
=gn.GenomeBrowser(gff_path,
g=False,
search=True,
z_stack=(220000,250000) #specifying bounds avoids loading the whole genome in memory
bounds
) g.show()
Selecting feature types and attributes
You can select which feature type to display. The default feature types are ["CDS", "repeat_region", "ncRNA", "rRNA", "tRNA"]
.
You can inspect the gff file to see what feature types and attributes are available. The parse_gff
or parse_genbank
functions can conveniently be used for this purpose. You can also use the inspect_feature_types
function to output a table of feature types and attributes.
#Choosing the feature types and attributes to display
=gn.GenomeBrowser(gff_path,
g= ["tRNA","rRNA"],
feature_types = ["gene","locus_tag","product","gbkey"], #will be displayed when hovering
attributes =(220000,230000), #specifying bounds avoids loading the whole genome in memory
bounds=226000)
init_pos g.show()
Changing colors
Modifying features by type
The glyph shown for each feature type is defined through a Glyph object. A custom glyphs dictionnary can be passed to GenomeBrowser to customize the glyphs shown for different features.
=gn.get_default_glyphs()
glyphs'CDS'] glyphs[
Glyph object with attributes:
glyph_type: arrow
colors: ('purple', 'orange')
height: 1
alpha: 0.8
show_name: True
name_attr: gene
'repeat_region'] glyphs[
Glyph object with attributes:
glyph_type: box
colors: ('grey',)
height: 0.8
alpha: 1
show_name: False
name_attr: gene
Modifying the default glyphs
"CDS"].colors= ('blue','green') #two colors can be specified, one for each orientation.
glyphs[
=gn.GenomeBrowser(gff_path, glyphs=glyphs, init_pos=224000, bounds=(220000,230000), search=False)
g g.show()
Defining a new Glyph from scractch
"rRNA"]=gn.Glyph(glyph_type="box",
glyphs[="red",
colors=0.5,
height=True)
show_name=gn.GenomeBrowser(gff_path,
g=glyphs,
glyphs={'rRNA':"locus_tag"},
feature_name=224000,
init_pos=(220000,230000),
bounds=False)
search g.show()
"rRNA"] g.glyphs[
Glyph object with attributes:
glyph_type: box
colors: ('red',)
height: 0.5
alpha: 0.8
show_name: True
name_attr: locus_tag
Modifying specific features
You can also access a DataFrame with the characteristics of each feature plotted on the genome browser. These can be modified as you wish to customize your plot.
g.patches.head()
names | xs | ys | xbox_min | color | alpha | pos | attributes | type | label_y | label_x | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | metQ | (220928, 220928, 220213, 220113, 220213) | (0.05, 0.2, 0.2, 0.125, 0.05) | 220213 | green | 0.8 | 220520.5 | <span style="color:FireBrick">CDS</span><br><s... | CDS | 0.2300 | 220520.5 |
1 | metI | (221621, 221621, 221068, 220968, 221068) | (0.05, 0.2, 0.2, 0.125, 0.05) | 221068 | green | 0.8 | 221294.5 | <span style="color:FireBrick">CDS</span><br><s... | CDS | 0.2300 | 221294.5 |
2 | metN | (222645, 222645, 221714, 221614, 221714) | (0.05, 0.2, 0.2, 0.125, 0.05) | 221714 | green | 0.8 | 222129.5 | <span style="color:FireBrick">CDS</span><br><s... | CDS | 0.2300 | 222129.5 |
3 | gmhB | (222833, 222833, 223308, 223408, 223308) | (0.05, 0.2, 0.2, 0.125, 0.05) | 222833 | blue | 0.8 | 223120.5 | <span style="color:FireBrick">CDS</span><br><s... | CDS | 0.2300 | 223120.5 |
4 | b0201 | (223771, 223771, 225312, 225312) | (0.0875, 0.1625, 0.1625, 0.0875) | 223771 | red | 0.8 | 224541.5 | <span style="color:FireBrick">rRNA</span><br><... | rRNA | 0.2675 | 224541.5 |
=gn.GenomeBrowser(gff_path, init_pos=224000, bounds=(220000,230000), search=False)
g=="metN","color"]="green"
g.patches.loc[g.patches.names g.show()
Customizing labels
Changing the attribute used as the feature name
You can chose which attribute of the GFF file should be displayed on top of the gene. The feature_name needs to belong to the list of attributes. The default list of attributes is [“locus_tag”,“gene”,“product”].
=gn.GenomeBrowser(gff_path,
g=["protein_id",'gene','product'],
attributes="protein_id",
feature_name=(20000,30000),
bounds=False)
search g.show()
You can also provide a dictionary to use different names for different feature types
=gn.GenomeBrowser(gff_path,
g=["protein_id",'gene','product','ID'],
attributes=["rRNA","CDS"],
feature_types={"CDS":"gene",'rRNA':'ID'},
feature_name=(220000,230000),
bounds=False)
search
g.show()
Changing the name of specific features
You can also modify the names of specific features directly in the patches DataFrame
g.patches.head()
names | xs | ys | xbox_min | color | alpha | pos | attributes | type | label_y | label_x | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | metQ | (220928, 220928, 220213, 220113, 220213) | (0.05, 0.2, 0.2, 0.125, 0.05) | 220213 | orange | 0.8 | 220520.5 | <span style="color:FireBrick">CDS</span><br><s... | CDS | 0.23 | 220520.5 |
1 | metI | (221621, 221621, 221068, 220968, 221068) | (0.05, 0.2, 0.2, 0.125, 0.05) | 221068 | orange | 0.8 | 221294.5 | <span style="color:FireBrick">CDS</span><br><s... | CDS | 0.23 | 221294.5 |
2 | metN | (222645, 222645, 221714, 221614, 221714) | (0.05, 0.2, 0.2, 0.125, 0.05) | 221714 | orange | 0.8 | 222129.5 | <span style="color:FireBrick">CDS</span><br><s... | CDS | 0.23 | 222129.5 |
3 | gmhB | (222833, 222833, 223308, 223408, 223308) | (0.05, 0.2, 0.2, 0.125, 0.05) | 222833 | purple | 0.8 | 223120.5 | <span style="color:FireBrick">CDS</span><br><s... | CDS | 0.23 | 223120.5 |
4 | rna-b0201 | (223771, 223771, 225212, 225312, 225212) | (0.05, 0.2, 0.2, 0.125, 0.05) | 223771 | purple | 0.8 | 224541.5 | <span style="color:FireBrick">rRNA</span><br><... | rRNA | 0.23 | 224541.5 |
=="metN","color"]="green"
g.patches.loc[g.patches.names=="metN","names"]="custom"
g.patches.loc[g.patches.names g.show()
Changing label angle and size
=gn.GenomeBrowser(gff_path,
g=0,
label_angle="10pt",
label_font_size=200,
height=0.2, #fraction of the annotation track occupied by the features
feature_height=(20000,30000),
bounds=["gene","locus_tag"],
attributes=False,
search=True) #z_stack will cause overlapping features to shift so that they are distinct on the y-axis
z_stack g.show()
Adding information to be displayed when hovering
Data can be added to the tooltip that appears when hovering. Make sure that the length of the values list equals the number of patches.
=gn.GenomeBrowser(gff_path=gff_path, bounds=(0,10000), attributes=["gene","product","locus_tag"])
g=np.random.randint(0,10,len(g.patches))
values"data",values)
g.add_tooltip_data( g.show()
Toolbar location
The location of the toolbar can be changed for each track independently. Placing it above or below can help see all the Bokeh tools available. Valid values are: * “above” * “below” * “left” * “right”
See the Bokeh documentation for more details.
import pandas as pd
= gn.get_example_data_dir()
data_path = os.path.join(data_path, "MG1655_U00096.gff3")
gff_path
=pd.DataFrame(dict(x=np.arange(0,50000,100),
data=np.sin(np.arange(0,50000,100))))
y
=gn.GenomeBrowser(gff_path=gff_path,
g=(0,50000),
bounds="above",
toolbar_location=["gene","product"],
attributes=False)
search
= g.add_track(height=200,
track ="above")
toolbar_location
=data, pos="x", y="y")
track.bar(data g.show()