import genomenotebook as gn
import os
Options
Selecting feature types and attributes
You can select which feature type to display. The default feature types are ["CDS", "repeat_region", "ncRNA", "rRNA", "tRNA"]
.
You can inspect the gff file to see what feature types and attributes are available. The genomenotebook.parse_gff
function can conveniently be used for this purpose. It also accepts gzipped gff files.
= gn.get_example_data_dir()
data_path = os.path.join(data_path, "MG1655_U00096.gff3")
gff_path = gn.parse_gff(gff_path)
features features.head()
seq_id | source | type | start | end | score | strand | phase | attributes | Parent | ... | substrain | part | mobile_element_type | gene_biotype | gbkey | rpt_type | Name | left | right | middle | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | U00096.3 | Genbank | region | 1 | 4641652 | . | + | . | ID=U00096.3:1..4641652;Dbxref=taxon:511145;Is_... | None | ... | MG1655 | None | None | None | Src | None | ANONYMOUS | 1 | 4641652 | 2320826.5 |
1 | U00096.3 | Genbank | gene | 190 | 255 | . | + | . | ID=gene-b0001;Dbxref=ASAP:ABE-0000006,ECOCYC:E... | None | ... | None | None | None | protein_coding | Gene | None | thrL | 190 | 255 | 222.5 |
2 | U00096.3 | Genbank | CDS | 190 | 255 | . | + | 0 | ID=cds-AAC73112.1;Parent=gene-b0001;Dbxref=Uni... | gene-b0001 | ... | None | None | None | None | CDS | None | AAC73112.1 | 190 | 255 | 222.5 |
3 | U00096.3 | Genbank | gene | 337 | 2799 | . | + | . | ID=gene-b0002;Dbxref=ASAP:ABE-0000008,ECOCYC:E... | None | ... | None | None | None | protein_coding | Gene | None | thrA | 337 | 2799 | 1568.0 |
4 | U00096.3 | Genbank | CDS | 337 | 2799 | . | + | 0 | ID=cds-AAC73113.1;Parent=gene-b0002;Dbxref=Uni... | gene-b0002 | ... | None | None | None | None | CDS | None | AAC73113.1 | 337 | 2799 | 1568.0 |
5 rows × 39 columns
# Available feature types
set(features.type)
{'CDS',
'exon',
'gene',
'mobile_genetic_element',
'ncRNA',
'origin_of_replication',
'pseudogene',
'rRNA',
'recombination_feature',
'region',
'repeat_region',
'sequence_feature',
'tRNA'}
# Available attributes
features.columns
Index(['seq_id', 'source', 'type', 'start', 'end', 'score', 'strand', 'phase',
'attributes', 'Parent', 'gene', 'transl_table', 'recombination_class',
'orig_protein_id', 'product', 'protein_id', 'transl_except', 'strain',
'gene_synonym', 'pseudo', 'exception', 'Note', 'orig_transcript_id',
'Dbxref', 'genome', 'ID', 'locus_tag', 'Is_circular', 'mol_type',
'substrain', 'part', 'mobile_element_type', 'gene_biotype', 'gbkey',
'rpt_type', 'Name', 'left', 'right', 'middle'],
dtype='object')
#Choosing the feature types, attributes and feature name to display
=gn.GenomeBrowser(gff_path,
g= ["tRNA","rRNA"],
feature_types = ["gene","locus_tag","strand","start","end"], #will be displayed when hovering
attributes =226000)
init_pos g.show()
Changing colors
Modifying features by type
The glyph shown for each feature type is defined through a Glyph object. A custom glyphs dictionnary can be passed to GenomeBrowser to customize the glyphs shown for different features.
=gn.get_default_glyphs()
glyphs'CDS'] glyphs[
Glyph object with attributes:
glyph_type: arrow
colors: ('purple', 'orange')
height: 1
alpha: 0.8
show_name: True
'repeat_region'] glyphs[
Glyph object with attributes:
glyph_type: box
colors: ('grey',)
height: 0.8
alpha: 1
show_name: False
Modifying the default glyphs
"CDS"].colors= ('blue','green')
glyphs[
=gn.GenomeBrowser(gff_path, glyphs=glyphs, init_pos=224000, bounds=(220000,230000), search=False)
g g.show()
Defining a new Glyph from scractch
"rRNA"]=gn.Glyph(glyph_type="box",
glyphs[="red",
colors=0.5,
height=False)
show_name
=gn.GenomeBrowser(gff_path, glyphs=glyphs, init_pos=224000, bounds=(220000,230000), search=False)
g g.show()
Modifying specific features
You can also access a DataFrame with the characteristics of each feature plotted on the genome browser. These can be modified as you wish to customize your plot.
g.patches.head()
names | xs | ys | color | alpha | pos | gene | locus_tag | product | |
---|---|---|---|---|---|---|---|---|---|
0 | metQ | (220928, 220928, 220213, 220113, 220213) | (0.05, 0.2, 0.2, 0.125, 0.05) | green | 0.8 | 220520.5 | metQ | b0197 | L-methionine/D-methionine ABC transporter memb... |
1 | metI | (221621, 221621, 221068, 220968, 221068) | (0.05, 0.2, 0.2, 0.125, 0.05) | green | 0.8 | 221294.5 | metI | b0198 | L-methionine/D-methionine ABC transporter memb... |
2 | metN | (222645, 222645, 221714, 221614, 221714) | (0.05, 0.2, 0.2, 0.125, 0.05) | green | 0.8 | 222129.5 | metN | b0199 | L-methionine/D-methionine ABC transporter ATP ... |
3 | gmhB | (222833, 222833, 223308, 223408, 223308) | (0.05, 0.2, 0.2, 0.125, 0.05) | blue | 0.8 | 223120.5 | gmhB | b0200 | D-glycero-beta-D-manno-heptose-1%2C7-bisphosph... |
4 | (223771, 223771, 225312, 225312) | (0.0875, 0.1625, 0.1625, 0.0875) | red | 0.8 | 224541.5 | rrsH | b0201 | 16S ribosomal RNA |
=gn.GenomeBrowser(gff_path, init_pos=224000, bounds=(220000,230000), search=False)
g=="metN","color"]="green"
g.patches.loc[g.patches.names g.show()
Customizing labels
Changing the attribute used as the feature name
You can chose which attribute of the GFF file should be displayed on top of the gene. The feature_name needs to belong to the list of attributes. The default list of attributes is [“locus_tag”,“gene”,“product”].
=gn.GenomeBrowser(gff_path,
g=["protein_id",'gene','product'],
attributes="protein_id",
feature_name=(20000,30000),
bounds=False)
search g.show()
Changing the name of specific features
You can also modify the names of specific features directly in the patches DataFrame
g.patches.head()
names | xs | ys | color | alpha | pos | protein_id | gene | product | |
---|---|---|---|---|---|---|---|---|---|
0 | AAC73132.1 | (20314, 20314, 19911, 19811, 19911) | (0.05, 0.2, 0.2, 0.125, 0.05) | orange | 0.8 | 20062.5 | AAC73132.1 | insB-1 | IS1 protein InsB |
1 | AAC73133.1 | (20508, 20508, 20333, 20233, 20333) | (0.05, 0.2, 0.2, 0.125, 0.05) | orange | 0.8 | 20370.5 | AAC73133.1 | insA-1 | IS1 protein InsA |
2 | AAC73134.1 | (21078, 21078, 20915, 20815, 20915) | (0.05, 0.2, 0.2, 0.125, 0.05) | orange | 0.8 | 20946.5 | AAC73134.1 | rpsT | 30S ribosomal subunit protein S20 |
3 | AAC73135.1 | (21181, 21181, 21299, 21399, 21299) | (0.05, 0.2, 0.2, 0.125, 0.05) | purple | 0.8 | 21290.0 | AAC73135.1 | yaaY | DUF2575 domain-containing protein YaaY |
4 | AAC73136.1 | (21407, 21407, 22248, 22348, 22248) | (0.05, 0.2, 0.2, 0.125, 0.05) | purple | 0.8 | 21877.5 | AAC73136.1 | ribF | bifunctional riboflavin kinase/FMN adenylyltra... |
=="ribF","color"]="green"
g.patches.loc[g.patches.gene=="ribF","names"]="CUSTOM NAME"
g.patches.loc[g.patches.gene g.show()
Changing label angle and size
=gn.GenomeBrowser(gff_path,
g=0,
label_angle="10pt",
label_font_size=80,
height=0.4, #fraction of the annotation track occupied by the features
feature_height=(20000,30000),
bounds=False)
search g.show()
Adding information to be displayed when hovering
Any new column added to the patches DataFrame can be shown when hovering over features of the annotation track. The name of the column to display should also be provided as an attribute.
=gn.GenomeBrowser(gff_path,
g=224000,
init_pos=(220000,230000),
bounds=["gene","locus_tag","data"],
attributes=False)
search"data"]=np.random.randint(0,10,len(g.patches))
g.patches[ g.show()
Toolbar location
The location of the toolbar can be changed for each track independently. Placing it above or below can help see all the Bokeh tools available. Valid values are: * “above” * “below” * “left” * “right”
See the Bokeh documentation for more details.
import pandas as pd
= gn.get_example_data_dir()
data_path = os.path.join(data_path, "MG1655_U00096.gff3")
gff_path
=pd.DataFrame(dict(x=np.arange(0,50000,100),
data=np.sin(np.arange(0,50000,100))))
y
=gn.GenomeBrowser(gff_path=gff_path,
g=(0,50000),
bounds="above",
toolbar_location=False)
search
= g.add_track(height=200,
track ="above")
toolbar_location
=data, pos="x", y="y")
track.bar(data g.show()