analysis

Functions for analyzing mutation rates in DGRec experiments. Computes the mutation rate per base within a specified sequence range from genotype data. The hallmark DGRec signature is elevated A-specific mutation rates within the VR.
from dgrec.example_data import get_example_data_dir

source

mut_rate


def mut_rate(
    gen_list:list, # a genotype list with the number of molecules detected
    ran:tuple, # the position range in which to compute the mutation rate. If None the rate is computed for the full sequence.
    ref_seq:str, # reference sequence
):

Computes the mutation rate per base within the specified range. The rate can be computed for specific bases using the base_restriction argument.

data_path=get_example_data_dir()
gen_list=parse_genotypes(os.path.join(data_path,"sacB_genotypes.csv"))

read_ref_file="sacB_ref.fasta"
ref=next(SeqIO.parse(os.path.join(data_path,read_ref_file),"fasta"))
ref_seq=str(ref.seq)

#showing a few example lines
for g,n in gen_list[1:200:20]:
    print(n,"\t",g)
279      A91G
28   A68C
15   A72G,A79T,A91T
10   A61G,A72G
6    A61G,A68G
6    A68G,A76G,A91G
5    A61T,A79G
4    A86T
4    A72G,A76G,A86G,A91T
3    A61T,A76G,A91G
TR_range=(50,119)
before_TR_range=(5,50)
mut_rate_TR=mut_rate(gen_list,TR_range,ref_seq)
for b in mut_rate_TR:
    print(f"Mutation rate in VR at {b} positions: {mut_rate_TR[b]:.1e}")

mut_rate_outside_TR=mut_rate(gen_list,before_TR_range,ref_seq)
for b in mut_rate_outside_TR:
    print(f"Mutation rate outside VR at {b} positions: {mut_rate_outside_TR[b]:.1e}")
Mutation rate in VR at A positions: 1.9e-02
Mutation rate in VR at T positions: 9.9e-04
Mutation rate in VR at G positions: 1.1e-04
Mutation rate in VR at C positions: 2.2e-04
Mutation rate in VR at all positions: 2.9e-03
Mutation rate outside VR at A positions: 2.1e-05
Mutation rate outside VR at T positions: 4.8e-05
Mutation rate outside VR at G positions: 1.8e-04
Mutation rate outside VR at C positions: 5.9e-05
Mutation rate outside VR at all positions: 6.7e-05