import genomenotebook as gn
import osExamples
Simple browser
Create a simple genome browser with a search bar. The sequence appears when zooming in.
#Using the example E. coli genome data from the package
data_path = gn.get_example_data_dir()
fasta_path = os.path.join(data_path, "MG1655_U00096.fasta")
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")
g=gn.GenomeBrowser(gff_path=gff_path,
fasta_path=fasta_path,
attributes=["gene", "locus_tag", "product"],
init_pos=50000,
bounds=(0,100000) #applying bounds avoids loading the whole genome in memory if you don't need it
)
g.show()Adding a data track
GenomeNotebook provides three types of the pre-built tracks:
A pandas DataFrame is passed as the source of the data. See the example usage below.
Plotting some ChIP-seq data with Track.line
import pyBigWig
import pandas as pd
import numpy as npg=gn.GenomeBrowser(fasta_path=fasta_path,
gff_path=gff_path,
init_pos=82000,
bounds=(60000,100000),
attributes=["gene", "locus_tag", "product"],
search=False,
show_seq=False)
#Importing some coverage data from a BigWig file
bw_file_path=os.path.join(data_path,"ChIP-ACCCA-1.bw")
refname='NC_000913'
with pyBigWig.open(bw_file_path) as bw:
cov=bw.values(refname,0,g.seq_len,numpy=True)
data=pd.DataFrame({"pos": np.arange(0,g.seq_len,10),
"cov": cov[::10]})
track=g.add_track()
track.line(data,pos="pos",y="cov",
line_color="blue", #check out the Bokeh documentation for other keyword arguments you can pass
line_width=2)
g.show()Note that pyBigWig cannot be installed on Windows. If you are a windows user you can still make this work by running genomenotebook in WSL.
Plotting some CRISPR screening data with Track.scatter and Track.bar
#Opening the Cui 2018 CRISPRi screen data
cui2018data="https://gitlab.pasteur.fr/dbikard/badSeed_public/raw/master/screen_data.csv"
cui2018data=pd.read_csv(cui2018data)
cui2018data.head()| guide | gene | essential | pos | ori | coding | fit18 | fit75 | ntargets | seq | |
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | AAAAAACCTGCTGGTGAGGC | NaN | NaN | 2202483 | - | NaN | -4.850012 | -1.437546 | 1 | AAAGCAGATCACAGTAAATAAAAAAACCTGCTGGTGAGGCAGGTTC... |
| 1 | AAAAAACGTATTCGCTTGCA | curA | False | 1517891 | + | False | -0.094026 | -0.100313 | 1 | TGTTGATGGCTACAGTGCTGAAAAAACGTATTCGCTTGCAAGGTTT... |
| 2 | AAAAAAGCGCACTTTTTGAC | NaN | NaN | 1919717 | + | NaN | -1.109310 | -0.246740 | 1 | GTAACGCCTGACAGCGCACAAAAAAAGCGCACTTTTTGACTGGCAC... |
| 3 | AAAAAAGCGGTGACTTACGA | bglA | False | 3042929 | + | False | -1.328831 | -0.905068 | 1 | GCGCCCATATCGAAGAGATGAAAAAAGCGGTGACTTACGATGGCGT... |
| 4 | AAAAAATCTGCCCGTGTCGT | gyrA | True | 2337231 | - | False | -0.840373 | -0.598858 | 1 | ATGACTGGAACAAAGCCTATAAAAAATCTGCCCGTGTCGTTGGTGA... |
g=gn.GenomeBrowser(fasta_path=fasta_path,
gff_path=gff_path,
bounds=(70000,110000),
attributes=["gene", "locus_tag", "product"],
search=False,
show_seq=False,
)
track=g.add_track(height=100)
track.scatter(data=cui2018data, pos="pos", y="fit75", factors="ori", hover_data=["fit18","guide"])
track2=g.add_track(height=100)
track2.bar(data=cui2018data, pos="pos", y="fit18", factors="ori", hover_data=["fit75","guide"])
g.show()