import genomenotebook as gn
import os
Examples
Simple browser
Create a simple genome browser with a search bar. The sequence appears when zooming in.
#Using the example E. coli genome data from the package
= gn.get_example_data_dir()
data_path = os.path.join(data_path, "MG1655_U00096.fasta")
fasta_path = os.path.join(data_path, "MG1655_U00096.gff3")
gff_path
=gn.GenomeBrowser(gff_path=gff_path,
g=fasta_path,
fasta_path=["gene", "locus_tag", "product"],
attributes=50000,
init_pos=(0,100000) #applying bounds avoids loading the whole genome in memory if you don't need it
bounds
) g.show()
Adding a data track
GenomeNotebook provides three types of the pre-built tracks:
A pandas DataFrame is passed as the source of the data. See the example usage below.
Plotting some ChIP-seq data with Track.line
import pyBigWig
import pandas as pd
import numpy as np
=gn.GenomeBrowser(fasta_path=fasta_path,
g=gff_path,
gff_path=82000,
init_pos=(60000,100000),
bounds=["gene", "locus_tag", "product"],
attributes=False,
search=False)
show_seq
#Importing some coverage data from a BigWig file
=os.path.join(data_path,"ChIP-ACCCA-1.bw")
bw_file_path='NC_000913'
refnamewith pyBigWig.open(bw_file_path) as bw:
=bw.values(refname,0,g.seq_len,numpy=True)
cov
=pd.DataFrame({"pos": np.arange(0,g.seq_len,10),
data"cov": cov[::10]})
=g.add_track()
track="pos",y="cov",
track.line(data,pos="blue", #check out the Bokeh documentation for other keyword arguments you can pass
line_color=2)
line_width g.show()
Note that pyBigWig cannot be installed on Windows. If you are a windows user you can still make this work by running genomenotebook in WSL.
Plotting some CRISPR screening data with Track.scatter
and Track.bar
#Opening the Cui 2018 CRISPRi screen data
="https://gitlab.pasteur.fr/dbikard/badSeed_public/raw/master/screen_data.csv"
cui2018data=pd.read_csv(cui2018data)
cui2018data cui2018data.head()
guide | gene | essential | pos | ori | coding | fit18 | fit75 | ntargets | seq | |
---|---|---|---|---|---|---|---|---|---|---|
0 | AAAAAACCTGCTGGTGAGGC | NaN | NaN | 2202483 | - | NaN | -4.850012 | -1.437546 | 1 | AAAGCAGATCACAGTAAATAAAAAAACCTGCTGGTGAGGCAGGTTC... |
1 | AAAAAACGTATTCGCTTGCA | curA | False | 1517891 | + | False | -0.094026 | -0.100313 | 1 | TGTTGATGGCTACAGTGCTGAAAAAACGTATTCGCTTGCAAGGTTT... |
2 | AAAAAAGCGCACTTTTTGAC | NaN | NaN | 1919717 | + | NaN | -1.109310 | -0.246740 | 1 | GTAACGCCTGACAGCGCACAAAAAAAGCGCACTTTTTGACTGGCAC... |
3 | AAAAAAGCGGTGACTTACGA | bglA | False | 3042929 | + | False | -1.328831 | -0.905068 | 1 | GCGCCCATATCGAAGAGATGAAAAAAGCGGTGACTTACGATGGCGT... |
4 | AAAAAATCTGCCCGTGTCGT | gyrA | True | 2337231 | - | False | -0.840373 | -0.598858 | 1 | ATGACTGGAACAAAGCCTATAAAAAATCTGCCCGTGTCGTTGGTGA... |
=gn.GenomeBrowser(fasta_path=fasta_path,
g=gff_path,
gff_path=(70000,110000),
bounds=["gene", "locus_tag", "product"],
attributes=False,
search=False,
show_seq
)
=g.add_track(height=100)
track=cui2018data, pos="pos", y="fit75", factors="ori", hover_data=["fit18","guide"])
track.scatter(data
=g.add_track(height=100)
track2=cui2018data, pos="pos", y="fit18", factors="ori", hover_data=["fit75","guide"])
track2.bar(data g.show()