A genome browser in your Jupyter notebook
Project description
Getting Started
Documentation
https://dbikard.github.io/genomenotebook/
Install
pip install genomenotebook
How to use
Create a simple genome browser with a search bar. The sequence appears when zooming in.
#Using the example E. coli genome data from the package
import genomenotebook as gn
import os
from bokeh.io import output_notebook
output_notebook()
data_path = gn.get_example_data_dir()
genome_path = os.path.join(data_path, "MG1655_U00096.fasta")
gff_path = os.path.join(data_path, "MG1655_U00096.gff3")
g=gn.GenomeBrowser(genome_path=genome_path, gff_path=gff_path, init_pos=100000)
g.show()
<style>
.bk-notebook-logo {
display: block;
width: 20px;
height: 20px;
background-image: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAABx0RVh0U29mdHdhcmUAQWRvYmUgRmlyZXdvcmtzIENTNui8sowAAAOkSURBVDiNjZRtaJVlGMd/1/08zzln5zjP1LWcU9N0NkN8m2CYjpgQYQXqSs0I84OLIC0hkEKoPtiH3gmKoiJDU7QpLgoLjLIQCpEsNJ1vqUOdO7ppbuec5+V+rj4ctwzd8IIbbi6u+8f1539dt3A78eXC7QizUF7gyV1fD1Yqg4JWz84yffhm0qkFqBogB9rM8tZdtwVsPUhWhGcFJngGeWrPzHm5oaMmkfEg1usvLFyc8jLRqDOMru7AyC8saQr7GG7f5fvDeH7Ej8CM66nIF+8yngt6HWaKh7k49Soy9nXurCi1o3qUbS3zWfrYeQDTB/Qj6kX6Ybhw4B+bOYoLKCC9H3Nu/leUTZ1JdRWkkn2ldcCamzrcf47KKXdAJllSlxAOkRgyHsGC/zRday5Qld9DyoM4/q/rUoy/CXh3jzOu3bHUVZeU+DEn8FInkPBFlu3+nW3Nw0mk6vCDiWg8CeJaxEwuHS3+z5RgY+YBR6V1Z1nxSOfoaPa4LASWxxdNp+VWTk7+4vzaou8v8PN+xo+KY2xsw6une2frhw05CTYOmQvsEhjhWjn0bmXPjpE1+kplmmkP3suftwTubK9Vq22qKmrBhpY4jvd5afdRA3wGjFAgcnTK2s4hY0/GPNIb0nErGMCRxWOOX64Z8RAC4oCXdklmEvcL8o0BfkNK4lUg9HTl+oPlQxdNo3Mg4Nv175e/1LDGzZen30MEjRUtmXSfiTVu1kK8W4txyV6BMKlbgk3lMwYCiusNy9fVfvvwMxv8Ynl6vxoByANLTWplvuj/nF9m2+PDtt1eiHPBr1oIfhCChQMBw6Aw0UulqTKZdfVvfG7VcfIqLG9bcldL/+pdWTLxLUy8Qq38heUIjh4XlzZxzQm19lLFlr8vdQ97rjZVOLf8nclzckbcD4wxXMidpX30sFd37Fv/GtwwhzhxGVAprjbg0gCAEeIgwCZyTV2Z1REEW8O4py0wsjeloKoMr6iCY6dP92H6Vw/oTyICIthibxjm/DfN9lVz8IqtqKYLUXfoKVMVQVVJOElGjrnnUt9T9wbgp8AyYKaGlqingHZU/uG2NTZSVqwHQTWkx9hxjkpWDaCg6Ckj5qebgBVbT3V3NNXMSiWSDdGV3hrtzla7J+duwPOToIg42ChPQOQjspnSlp1V+Gjdged7+8UN5CRAV7a5EdFNwCjEaBR27b3W890TE7g24NAP/mMDXRWrGoFPQI9ls/MWO2dWFAar/xcOIImbbpA3zgAAAABJRU5ErkJggg==);
}
</style>
<div>
<a href="https://bokeh.org" target="_blank" class="bk-notebook-logo"></a>
<span id="p96047">Loading BokehJS ...</span>
</div>
Unable to display output for mime type(s): application/javascript, application/vnd.bokehjs_load.v0+json
Unable to display output for mime type(s): application/javascript, application/vnd.bokehjs_exec.v0+json
Adding a track with random points as a demonstration. genomeNotebook
uses the Bokeh library and track.fig
is a simple Bokeh figure on which
you can plot anything you want using Bokeh.
import numpy as np
track = g.add_track()
x= np.arange(0,100000,100)
y= np.random.randint(0,10,size=x.shape)
track.fig.scatter(x=x,y=y)
g.show()
Unable to display output for mime type(s): application/javascript, application/vnd.bokehjs_exec.v0+json
Plotting some ChIP-seq data
Track objects also have a few custom plotting functions: line, scatter and bar.
A pandas DataFrame is passed as the source of the data as follow.
import pyBigWig
import pandas as pd
g=gn.GenomeBrowser(genome_path=genome_path,
gff_path=gff_path,
bounds=(0,100000),
search=False,
show_seq=False)
#Importing some coverage data from a BigWig file
bw_file_path=os.path.join(data_path,"ChIP-ACCCA-1.bw")
refname='NC_000913'
with pyBigWig.open(bw_file_path) as bw:
cov=bw.values(refname,0,g.seq_len,numpy=True)
source=pd.DataFrame({"pos": np.arange(0,g.seq_len,10),
"cov": cov[::10]})
track=g.add_track()
track.line(source,pos="pos",y="cov")
g.show()
Unable to display output for mime type(s): application/javascript, application/vnd.bokehjs_exec.v0+json
Plotting some CRISPR screening data
#Opening the Cui 2018 CRISPRi screen data
cui2018data="https://gitlab.pasteur.fr/dbikard/badSeed_public/raw/master/screen_data.csv"
cui2018data=pd.read_csv(cui2018data,index_col=0)
cui2018data.head()
<style>
.bk-notebook-logo {
display: block;
width: 20px;
height: 20px;
background-image: url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAABx0RVh0U29mdHdhcmUAQWRvYmUgRmlyZXdvcmtzIENTNui8sowAAAOkSURBVDiNjZRtaJVlGMd/1/08zzln5zjP1LWcU9N0NkN8m2CYjpgQYQXqSs0I84OLIC0hkEKoPtiH3gmKoiJDU7QpLgoLjLIQCpEsNJ1vqUOdO7ppbuec5+V+rj4ctwzd8IIbbi6u+8f1539dt3A78eXC7QizUF7gyV1fD1Yqg4JWz84yffhm0qkFqBogB9rM8tZdtwVsPUhWhGcFJngGeWrPzHm5oaMmkfEg1usvLFyc8jLRqDOMru7AyC8saQr7GG7f5fvDeH7Ej8CM66nIF+8yngt6HWaKh7k49Soy9nXurCi1o3qUbS3zWfrYeQDTB/Qj6kX6Ybhw4B+bOYoLKCC9H3Nu/leUTZ1JdRWkkn2ldcCamzrcf47KKXdAJllSlxAOkRgyHsGC/zRday5Qld9DyoM4/q/rUoy/CXh3jzOu3bHUVZeU+DEn8FInkPBFlu3+nW3Nw0mk6vCDiWg8CeJaxEwuHS3+z5RgY+YBR6V1Z1nxSOfoaPa4LASWxxdNp+VWTk7+4vzaou8v8PN+xo+KY2xsw6une2frhw05CTYOmQvsEhjhWjn0bmXPjpE1+kplmmkP3suftwTubK9Vq22qKmrBhpY4jvd5afdRA3wGjFAgcnTK2s4hY0/GPNIb0nErGMCRxWOOX64Z8RAC4oCXdklmEvcL8o0BfkNK4lUg9HTl+oPlQxdNo3Mg4Nv175e/1LDGzZen30MEjRUtmXSfiTVu1kK8W4txyV6BMKlbgk3lMwYCiusNy9fVfvvwMxv8Ynl6vxoByANLTWplvuj/nF9m2+PDtt1eiHPBr1oIfhCChQMBw6Aw0UulqTKZdfVvfG7VcfIqLG9bcldL/+pdWTLxLUy8Qq38heUIjh4XlzZxzQm19lLFlr8vdQ97rjZVOLf8nclzckbcD4wxXMidpX30sFd37Fv/GtwwhzhxGVAprjbg0gCAEeIgwCZyTV2Z1REEW8O4py0wsjeloKoMr6iCY6dP92H6Vw/oTyICIthibxjm/DfN9lVz8IqtqKYLUXfoKVMVQVVJOElGjrnnUt9T9wbgp8AyYKaGlqingHZU/uG2NTZSVqwHQTWkx9hxjkpWDaCg6Ckj5qebgBVbT3V3NNXMSiWSDdGV3hrtzla7J+duwPOToIg42ChPQOQjspnSlp1V+Gjdged7+8UN5CRAV7a5EdFNwCjEaBR27b3W890TE7g24NAP/mMDXRWrGoFPQI9ls/MWO2dWFAar/xcOIImbbpA3zgAAAABJRU5ErkJggg==);
}
</style>
<div>
<a href="https://bokeh.org" target="_blank" class="bk-notebook-logo"></a>
<span id="p122276">Loading BokehJS ...</span>
</div>
Unable to display output for mime type(s): application/javascript, application/vnd.bokehjs_load.v0+json
<style scoped>
.dataframe tbody tr th:only-of-type {
vertical-align: middle;
}
.dataframe tbody tr th {
vertical-align: top;
}
.dataframe thead th {
text-align: right;
}
</style>
gene | essential | pos | ori | coding | fit18 | fit75 | ntargets | seq | |
---|---|---|---|---|---|---|---|---|---|
guide | |||||||||
AAAAAACCTGCTGGTGAGGC | NaN | NaN | 2202483 | - | NaN | -4.850012 | -1.437546 | 1 | AAAGCAGATCACAGTAAATAAAAAAACCTGCTGGTGAGGCAGGTTC... |
AAAAAACGTATTCGCTTGCA | curA | False | 1517891 | + | False | -0.094026 | -0.100313 | 1 | TGTTGATGGCTACAGTGCTGAAAAAACGTATTCGCTTGCAAGGTTT... |
AAAAAAGCGCACTTTTTGAC | NaN | NaN | 1919717 | + | NaN | -1.109310 | -0.246740 | 1 | GTAACGCCTGACAGCGCACAAAAAAAGCGCACTTTTTGACTGGCAC... |
AAAAAAGCGGTGACTTACGA | bglA | False | 3042929 | + | False | -1.328831 | -0.905068 | 1 | GCGCCCATATCGAAGAGATGAAAAAAGCGGTGACTTACGATGGCGT... |
AAAAAATCTGCCCGTGTCGT | gyrA | True | 2337231 | - | False | -0.840373 | -0.598858 | 1 | ATGACTGGAACAAAGCCTATAAAAAATCTGCCCGTGTCGTTGGTGA... |
g=gn.GenomeBrowser(genome_path=genome_path,
gff_path=gff_path,
bounds=(0,1000000),
search=False,
show_seq=False)
track=g.add_track(height=100)
track.scatter(source=cui2018data,pos="pos",y="fit75",factors="ori")
track2=g.add_track(height=100)
track2.scatter(source=cui2018data,pos="pos",y="fit18",factors="ori")
g.show()
Unable to display output for mime type(s): application/javascript, application/vnd.bokehjs_exec.v0+json
Working with multiple chromosomes / contigs
import itertools
from Bio import SeqIO
genome_path = os.path.join(data_path, "jmh43.fna")
gff_path = os.path.join(data_path, "jmh43.gff")
for rec in itertools.islice(SeqIO.parse(genome_path,"fasta"),5):
g=gn.GenomeBrowser(genome_path=genome_path,
gff_path=gff_path,
seq_id=rec.id,
search=False)
g.show()
Unable to display output for mime type(s): application/javascript, application/vnd.bokehjs_exec.v0+json
Unable to display output for mime type(s): application/javascript, application/vnd.bokehjs_exec.v0+json
Unable to display output for mime type(s): application/javascript, application/vnd.bokehjs_exec.v0+json
Unable to display output for mime type(s): application/javascript, application/vnd.bokehjs_exec.v0+json
Unable to display output for mime type(s): application/javascript, application/vnd.bokehjs_exec.v0+json
Project details
Release history Release notifications | RSS feed
Download files
Download the file for your platform. If you're not sure which to choose, learn more about installing packages.
Source Distribution
genomenotebook-0.4.2.tar.gz
(21.3 MB
view hashes)
Built Distribution
Close
Hashes for genomenotebook-0.4.2-py3-none-any.whl
Algorithm | Hash digest | |
---|---|---|
SHA256 | 0020df8975aab1ecb056858183d53c14cd10c2f334fe5d78af62e213140a6429 |
|
MD5 | 8cbd153861284f73f675cbcc0dbb6111 |
|
BLAKE2b-256 | 1e7c83974182bf440bcd9a6480d919c59273926b27de6fb659d372d6f8abd308 |