Examples

Simple browser

Create a simple genome browser with a search bar. The sequence appears when zooming in.

import genomenotebook as gn
import os

#Using the example E. coli genome data from the package
data_path = gn.get_example_data_dir()
fasta_path = os.path.join(data_path, "MG1655_U00096.fasta")
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")

g=gn.GenomeBrowser(gff_path=gff_path, 
                   fasta_path=fasta_path, 
                   attributes=["gene", "locus_tag", "product"], 
                   init_pos=50000,
                   bounds=(0,100000) #applying bounds avoids loading the whole genome in memory if you don't need it
                   )
g.show()

Adding a data track

GenomeNotebook provides three types of the pre-built tracks:

A pandas DataFrame is passed as the source of the data. See the example usage below.

Plotting some ChIP-seq data with `Track.line`

import pyBigWig
import pandas as pd
import numpy as np

g=gn.GenomeBrowser(fasta_path=fasta_path, 
                   gff_path=gff_path, 
                   init_pos=82000,
                   bounds=(60000,100000),
                   attributes=["gene", "locus_tag", "product"],  
                   search=False, 
                   show_seq=False)

#Importing some coverage data from a BigWig file
bw_file_path=os.path.join(data_path,"ChIP-ACCCA-1.bw")
refname='NC_000913'
with pyBigWig.open(bw_file_path) as bw:
    cov=bw.values(refname,0,g.seq_len,numpy=True)
    
data=pd.DataFrame({"pos": np.arange(0,g.seq_len,10),
                     "cov": cov[::10]})

track=g.add_track()
track.line(data,pos="pos",y="cov", 
           line_color="blue", #check out the Bokeh documentation for other keyword arguments you can pass
           line_width=2)
g.show()

Note that pyBigWig cannot be installed on Windows. If you are a windows user you can still make this work by running genomenotebook in WSL.

Plotting some CRISPR screening data with `Track.scatter` and `Track.bar`

#Opening the Cui 2018 CRISPRi screen data
cui2018data="https://gitlab.pasteur.fr/dbikard/badSeed_public/raw/master/screen_data.csv"
cui2018data=pd.read_csv(cui2018data)
cui2018data.head()

	guide	gene	essential	pos	ori	coding	fit18	fit75	ntargets	seq
0	AAAAAACCTGCTGGTGAGGC	NaN	NaN	2202483	-	NaN	-4.850012	-1.437546	1	AAAGCAGATCACAGTAAATAAAAAAACCTGCTGGTGAGGCAGGTTC...
1	AAAAAACGTATTCGCTTGCA	curA	False	1517891	+	False	-0.094026	-0.100313	1	TGTTGATGGCTACAGTGCTGAAAAAACGTATTCGCTTGCAAGGTTT...
2	AAAAAAGCGCACTTTTTGAC	NaN	NaN	1919717	+	NaN	-1.109310	-0.246740	1	GTAACGCCTGACAGCGCACAAAAAAAGCGCACTTTTTGACTGGCAC...
3	AAAAAAGCGGTGACTTACGA	bglA	False	3042929	+	False	-1.328831	-0.905068	1	GCGCCCATATCGAAGAGATGAAAAAAGCGGTGACTTACGATGGCGT...
4	AAAAAATCTGCCCGTGTCGT	gyrA	True	2337231	-	False	-0.840373	-0.598858	1	ATGACTGGAACAAAGCCTATAAAAAATCTGCCCGTGTCGTTGGTGA...

g=gn.GenomeBrowser(fasta_path=fasta_path, 
                   gff_path=gff_path, 
                   bounds=(70000,110000),
                   attributes=["gene", "locus_tag", "product"],  
                   search=False, 
                   show_seq=False,
                   )

track=g.add_track(height=100)
track.scatter(data=cui2018data, pos="pos", y="fit75", factors="ori", hover_data=["fit18","guide"])

track2=g.add_track(height=100)
track2.bar(data=cui2018data, pos="pos", y="fit18", factors="ori", hover_data=["fit75","guide"])
g.show()

Custom tracks

genomeNotebook uses the Bokeh library. In the example below track.custom allows you to supply a function taking a simple Bokeh figure as an argument on which you can plot anything you want using Bokeh. Below we add a track with random points using Bokeh figure directly as a demonstration.

g=gn.GenomeBrowser(gff_path=gff_path, 
                   bounds=(0,100000),
                   attributes=["gene", "locus_tag", "product"], 
                   search=False)
track = g.add_track()

x= np.arange(0,100000,100)
y= np.random.randint(0,10,size=x.shape)
track.custom(lambda fig: fig.scatter(x=x,y=y))
g.show()

Highlighting regions

You can specify regions to highlight on the annotation track using the highlight function.

g=gn.GenomeBrowser(gff_path=gff_path, bounds=(0,10000))
highlight_regions=pd.DataFrame({"left": [5000, 8000], "right": [6000, 8500], "color": ["red","green"], "y":[23, 45]})
highlight_regions

	left	right	color	y
0	5000	6000	red	23
1	8000	8500	green	45

g.highlight(data=highlight_regions, hover_data=["y"])
g.show()

Working with multiple chromosomes / contigs

If your GFF file contains several chromosomes or contigs, the sequence id of the contig you want to display can be specified using the seq_id argument. You can also display multiple contigs with synchronized panning using genomenotebook.GenomeStack, see Multi genbanck and stacking.

import itertools
from Bio import SeqIO

fasta_path = os.path.join(data_path, "jmh43.fna")
gff_path = os.path.join(data_path, "jmh43.gff")

for rec in itertools.islice(SeqIO.parse(fasta_path,"fasta"),3):
    print(rec.id)
    g=gn.GenomeBrowser(gff_path=gff_path, 
                       fasta_path=fasta_path,
                       seq_id=rec.id,
                       attributes=["gene", "locus_tag", "product"], 
                       feature_name="locus_tag",
                       search=False)
    g.show()

NZ_JAGURL010000100.1

NZ_JAGURL010000101.1

NZ_JAGURL010000102.1