Skip to content

Commit

Permalink
more cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
Your Name committed May 11, 2024
1 parent 0550ed6 commit 8dc816d
Show file tree
Hide file tree
Showing 9 changed files with 41 additions and 67 deletions.
8 changes: 0 additions & 8 deletions PosteriorProb.h

This file was deleted.

12 changes: 6 additions & 6 deletions likelihood.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "likelihood.h"

#include <cstring>
#include <cstdlib>
#include <cstdio>
Expand All @@ -20,11 +20,11 @@
#include "profile.h"
#include "bfgs.h"
#include "htslib/bgzf.h"

#include "likelihood.h"
#include "misc.h"
#include "recalibration.h"
#include "ngsBriggs.h"
#include "PosteriorProb.h"
//#include "PosteriorProb.h"

double PhredError[255];
double PhredErrorAThird[255];
Expand Down Expand Up @@ -1475,7 +1475,7 @@ void like_hess_master(const double *xs,double **y){
}
}

double ErrorLik(char reffrag[], char frag[], int L, uint8_t seqError[]){
double ErrorLik(char reffrag[], char frag[], int L, uint8_t seqError[],int l_check){
double l1 = 0;
for(int i=0;0&&i<L;i++)
fprintf(stderr,"[%d] %d\n",i,seqError[i]);
Expand Down Expand Up @@ -1524,7 +1524,7 @@ double ErrorLik(char reffrag[], char frag[], int L, uint8_t seqError[]){

// Calculate the observation likelihood based on the Ancient model/ biotin model
// additive mode -> multiplicity model + speed up
double PMDLik_b(char reffrag[], char frag[], int L, double lambda, double delta, double delta_s, double nv, uint8_t seqError[],double Tol){
double PMDLik_b(char reffrag[], char frag[], int L, double lambda, double delta, double delta_s, double nv, uint8_t seqError[],double Tol,int l_check){
#if 0
for(int i=0;i<L;i++){
fprintf(stderr,"%d) reffrag: %d frag: %d L:%d lambda: %f delta: %f delata_s: %f nv: %f seqerr: %d tol: %e\n",i,reffrag[i],frag[i],L,lambda,delta,delta_s,nv,(int) seqError[i],Tol);
Expand Down Expand Up @@ -2072,7 +2072,7 @@ double PMDLik_b(char reffrag[], char frag[], int L, double lambda, double delta,

// The function below is for calculating likelihood of the reverse-complementary strand of the "biotin model" strand, the name is just for simplicity.
// additive -> mulplicative
double PMDLik_nb(char reffrag[], char frag[], int L, double lambda, double delta, double delta_s, double nv, uint8_t seqError[],double Tol){
double PMDLik_nb(char reffrag[], char frag[], int L, double lambda, double delta, double delta_s, double nv, uint8_t seqError[],double Tol,int l_check){
double l_pmd=0; // Likelihood
double p = 0; // Accumulated prob of (l,r)
double exterm_s = 0;
Expand Down
6 changes: 3 additions & 3 deletions likelihood.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,10 @@ void like_grad_master(const double *xs,double *y,const void *);

void like_hess_master(const double *xs,double **y);

double ErrorLik(char reffrag[], char frag[], int L, uint8_t seqError[]);
double ErrorLik(char reffrag[], char frag[], int L, uint8_t seqError[],int l_check);

double PMDLik_b(char reffrag[], char frag[], int L, double lambda, double delta, double delta_s, double nv, uint8_t seqError[],double Tol);
double PMDLik_b(char reffrag[], char frag[], int L, double lambda, double delta, double delta_s, double nv, uint8_t seqError[],double Tol,int l_check);

double PMDLik_nb(char reffrag[], char frag[], int L, double lambda, double delta, double delta_s, double nv, uint8_t seqError[],double Tol);
double PMDLik_nb(char reffrag[], char frag[], int L, double lambda, double delta, double delta_s, double nv, uint8_t seqError[],double Tol,int l_check);

#endif
10 changes: 5 additions & 5 deletions ngsBriggs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,23 @@
#include "misc.h"
#include "recalibration.h"
#include "likelihood.h"
#include "PosteriorProb.h"
#include "pp.h"
#include "ngsBriggs_cli.h"
#include "ngsBriggs.h"
#include "bdamagereader.h"


// definining all global variables used across multiple scripts

double l_check = 15;

tsk_struct *my_tsk_struct = NULL;

int tsk_nthreads = -1;


// defining our main ngsBriggs function
int main(int argc, char **argv){

double l_check = 15;
double** mm5p, **mm3p;
double Tol = 1.0E-8; // Tolerance
double **deamRateCT;
Expand Down Expand Up @@ -475,7 +475,7 @@ int main(int argc, char **argv){
}
bam_hdr_t *hdr = sam_hdr_read(in);
int ndim = 0;
double **mat = read_all_reads(in,hdr,seq_ref,len_limit,invec2[0],invec2[1],invec2[2],invec2[3],Tol,ndim,model);
double **mat = read_all_reads(in,hdr,seq_ref,len_limit,invec2[0],invec2[1],invec2[2],invec2[3],Tol,ndim,model,l_check);

//tsk stop
//double lbd2[4] = {30,1e-8,30,1e-8};
Expand Down Expand Up @@ -533,7 +533,7 @@ int main(int argc, char **argv){
kstr3->l = kstr3->m = 0;

if (mypars->ihts!=NULL){
bam_hdr_t* hdr = calc_pp_pmd_prob(refName,mypars->hts,mypars->ohts,mapped_only,se_only,mapq,seq_ref,len_limit,len_min,model,Contam_eps,invec2[0],invec2[1],invec2[2],invec2[3],distparam[0],distparam[1],distparam[2], distparam[3], mypars->dorecal,&str_cli,deamRateCT,deamRateGA,Tol);
bam_hdr_t* hdr = calc_pp_pmd_prob(refName,mypars->hts,mypars->ohts,mapped_only,se_only,mapq,seq_ref,len_limit,len_min,model,Contam_eps,invec2[0],invec2[1],invec2[2],invec2[3],distparam[0],distparam[1],distparam[2], distparam[3], mypars->dorecal,&str_cli,deamRateCT,deamRateGA,Tol,l_check);
sam_hdr_destroy(hdr);
}

Expand Down
4 changes: 0 additions & 4 deletions ngsBriggs.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
#ifndef NGSBRIGGS_H
#define NGSBRIGGS_H
extern int MAXLENGTH;
extern double l_check;

extern char refToChar[256];
extern char com[5];

#endif

53 changes: 17 additions & 36 deletions PosteriorProb.cpp → pp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
#include <htslib/faidx.h>
#include "ngsBriggs.h"
#include "likelihood.h"
#include "PosteriorProb.h"
#include "pp.h"
#include "misc.h"
#include "profile.h"

typedef unsigned char uchar;


double NoPMDGivenAnc_b(char reffrag[], char frag[], int L, double lambda, double delta, double delta_s, double nv,double Tol){
double NoPMDGivenAnc_b(char reffrag[], char frag[], int L, double lambda, double delta, double delta_s, double nv,double Tol,int l_check){
double np_pmd_anc=0; // probability of no pmd given ancient
double p = 0;
//Investigate each possible left and right 5' overhang pair with (l,r) <= l_check (15)
Expand Down Expand Up @@ -402,7 +402,7 @@ double NoPMDGivenAnc_b(char reffrag[], char frag[], int L, double lambda, double
}


double NoPMDGivenAnc_nb(char reffrag[], char frag[], int L, double lambda, double delta, double delta_s, double nv,double Tol){
double NoPMDGivenAnc_nb(char reffrag[], char frag[], int L, double lambda, double delta, double delta_s, double nv,double Tol,int l_check){
double np_pmd_anc=0; // Likelihood
double p = 0; // Accumulated prob of (l,r)
//Investigate each possible left and right 5' overhang pair with (l,r) <= l_check (15)
Expand Down Expand Up @@ -814,8 +814,8 @@ double NoPMDGivenAnc_nb(char reffrag[], char frag[], int L, double lambda, doubl
}

// The function below is for calculating the posterior prob of being ancient
double AncProb(char reffrag[], char frag[], int L, double lambda, double delta, double delta_s, double nv, uint8_t seqError[], int model, double eps, double anc_mu, double anc_si, double mod_mu, double mod_si, int isrecal, int len_limit, int len_min,double Tol){
double l_err = ErrorLik(reffrag, frag, L, seqError); // Modern Likelihood/Only Sequencing-error Likelihood
double AncProb(char reffrag[], char frag[], int L, double lambda, double delta, double delta_s, double nv, uint8_t seqError[], int model, double eps, double anc_mu, double anc_si, double mod_mu, double mod_si, int isrecal, int len_limit, int len_min,double Tol,int l_check){
double l_err = ErrorLik(reffrag, frag, L, seqError,l_check); // Modern Likelihood/Only Sequencing-error Likelihood
double prior_anc = 1-eps;
double post_anc;
double l_anc_l = 1;
Expand All @@ -833,12 +833,12 @@ double AncProb(char reffrag[], char frag[], int L, double lambda, double delta,
l_err_l = NormalINC(y_max2, y_min2, x_max2, x_min2);
}
if (model==0){
double l_anc_b = PMDLik_b(reffrag, frag, L, lambda, delta, delta_s, nv, seqError,Tol); // Ancient Likelihood based on biotin model
double l_anc_b = PMDLik_b(reffrag, frag, L, lambda, delta, delta_s, nv, seqError,Tol,l_check); // Ancient Likelihood based on biotin model
post_anc = prior_anc * l_anc_b * l_anc_l/(prior_anc * l_anc_b * l_anc_l+ (1-prior_anc) * l_err * l_err_l);
//double test = exp(-10);
//cout<<prior_anc<<" "<<l_anc_b<<" "<<l_anc_l<<" "<<prior_anc * l_anc_b * l_anc_l<<" "<<test<<"Lei Lei Lei\n";
}else if(model==1){
double l_anc_nb = 0.5*PMDLik_b(reffrag, frag, L, lambda, delta, delta_s, nv, seqError,Tol)+0.5*PMDLik_nb(reffrag, frag, L, lambda, delta, delta_s, nv, seqError,Tol); // Ancient Likelihood based on non-biotin model
double l_anc_nb = 0.5*PMDLik_b(reffrag, frag, L, lambda, delta, delta_s, nv, seqError,Tol,l_check)+0.5*PMDLik_nb(reffrag, frag, L, lambda, delta, delta_s, nv, seqError,Tol,l_check); // Ancient Likelihood based on non-biotin model
// cout << "l_anc_nb is "<<l_anc_nb<<"\n";
// cout << "PMDLik_b is "<<PMDLik_b(reffrag, frag, L, lambda, delta, delta_s, nv, seqError)<<"\n";
// cout << "PMDLik_nb is "<<PMDLik_nb(reffrag, frag, L, lambda, delta, delta_s, nv, seqError)<<"\n";
Expand All @@ -854,18 +854,18 @@ double AncProb(char reffrag[], char frag[], int L, double lambda, double delta,


// The function below is for calculating the posterior prob of being damaged given ancient
double PMDProb(char reffrag[], char frag[], int L, double lambda, double delta, double delta_s, double nv, uint8_t seqError[], int model,double Tol){
double l_err = ErrorLik(reffrag, frag, L, seqError); // Modern Likelihood/Only Sequencing-error Likelihood
double PMDProb(char reffrag[], char frag[], int L, double lambda, double delta, double delta_s, double nv, uint8_t seqError[], int model,double Tol,int l_check){
double l_err = ErrorLik(reffrag, frag, L, seqError,l_check); // Modern Likelihood/Only Sequencing-error Likelihood
//double prior_pmd = 0.25+0.5*lambda/(1-pow(1-lambda,L-1))*(1-pow((1-lambda)*(1-delta_s/4)/(1-delta/4),L-1))/(1-(1-lambda)*(1-delta_s/4)/(1-delta/4))+0.25*pow(lambda,2)/pow(1-pow(1-lambda,L-1),2)*(1-pow((1-lambda)*(1-delta_s/4)/(1-delta/4),L-1))/pow(1-(1-lambda)*(1-delta_s/4)/(1-delta/4),2)-0.25*pow(lambda,2)/pow(1-pow(1-lambda,L-1),2)*(L-1)*pow((1-lambda)*(1-delta_s/4)/(1-delta/4),L-1)/(1-(1-lambda)*(1-delta_s/4)/(1-delta/4));
//prior_pmd = 1 - prior_pmd * pow(1-delta,L)/(0.75+0.25*(1-pow(1-lambda,L-1)-(L-1)*lambda*pow(1-lambda,L-1))/pow(1-pow(1-lambda,L-1),2));
double post_pmd;
if (model==0){
double l_anc_b = PMDLik_b(reffrag, frag, L, lambda, delta, delta_s, nv, seqError,Tol);
double np_pmd_anc_b = NoPMDGivenAnc_b(reffrag, frag, L, lambda, delta, delta_s, nv,Tol);
double l_anc_b = PMDLik_b(reffrag, frag, L, lambda, delta, delta_s, nv, seqError,Tol,l_check);
double np_pmd_anc_b = NoPMDGivenAnc_b(reffrag, frag, L, lambda, delta, delta_s, nv,Tol,l_check);
post_pmd = 1-np_pmd_anc_b*l_err/l_anc_b;
}else if(model==1){
double l_anc_nb = 0.5*PMDLik_b(reffrag, frag, L, lambda, delta, delta_s, nv, seqError,Tol) + 0.5*PMDLik_nb(reffrag, frag, L, lambda, delta, delta_s, nv, seqError,Tol);
double np_pmd_anc_nb = 0.5*NoPMDGivenAnc_b(reffrag, frag, L, lambda, delta, delta_s, nv,Tol)+0.5*NoPMDGivenAnc_nb(reffrag, frag, L, lambda, delta, delta_s, nv,Tol);
double l_anc_nb = 0.5*PMDLik_b(reffrag, frag, L, lambda, delta, delta_s, nv, seqError,Tol,l_check) + 0.5*PMDLik_nb(reffrag, frag, L, lambda, delta, delta_s, nv, seqError,Tol,l_check);
double np_pmd_anc_nb = 0.5*NoPMDGivenAnc_b(reffrag, frag, L, lambda, delta, delta_s, nv,Tol,l_check)+0.5*NoPMDGivenAnc_nb(reffrag, frag, L, lambda, delta, delta_s, nv,Tol,l_check);
post_pmd = 1-np_pmd_anc_nb*l_err/l_anc_nb;
}
if (post_pmd < 0){
Expand All @@ -876,7 +876,7 @@ double PMDProb(char reffrag[], char frag[], int L, double lambda, double delta,
return post_pmd;
}

bam_hdr_t* calc_pp_pmd_prob(char *refName,char *ifname, char* ofname, int mapped_only,int se_only, int mapq, faidx_t *seq_ref, int len_limit, int len_min, int model, double eps, double lambda, double delta, double delta_s, double nv, double anc_mu, double anc_si, double mod_mu, double mod_si, int isrecal, kstring_t *str_cli,double **deamRateCT,double **deamRateGA,double Tol)
bam_hdr_t* calc_pp_pmd_prob(char *refName,char *ifname, char* ofname, int mapped_only,int se_only, int mapq, faidx_t *seq_ref, int len_limit, int len_min, int model, double eps, double lambda, double delta, double delta_s, double nv, double anc_mu, double anc_si, double mod_mu, double mod_si, int isrecal, kstring_t *str_cli,double **deamRateCT,double **deamRateGA,double Tol,int l_check)
{
// exit(0);
char nuc[6] = "ACGTN";
Expand Down Expand Up @@ -1010,35 +1010,16 @@ bam_hdr_t* calc_pp_pmd_prob(char *refName,char *ifname, char* ofname, int mapped
}


double PostAncProb1 = AncProb(yourrefe, yourread, b->core.l_qseq, lambda, delta, delta_s, nv, yourqual, model ,eps, anc_mu, anc_si, mod_mu, mod_si, 0, len_limit, len_min,Tol);
double PostAncProb1 = AncProb(yourrefe, yourread, b->core.l_qseq, lambda, delta, delta_s, nv, yourqual, model ,eps, anc_mu, anc_si, mod_mu, mod_si, 0, len_limit, len_min,Tol,l_check);
double PostAncProb2 = 0;

if (isrecal==1) {
PostAncProb2 = AncProb(yourrefe, yourread, b->core.l_qseq, lambda, delta, delta_s, nv, yourqual, model ,eps, anc_mu, anc_si, mod_mu, mod_si, 1, len_limit, len_min,Tol);
PostAncProb2 = AncProb(yourrefe, yourread, b->core.l_qseq, lambda, delta, delta_s, nv, yourqual, model ,eps, anc_mu, anc_si, mod_mu, mod_si, 1, len_limit, len_min,Tol,l_check);
}

double PostPMDProb = PMDProb(yourrefe, yourread, b->core.l_qseq, lambda, delta, delta_s, nv, yourqual, model,Tol);
double PostPMDProb = PMDProb(yourrefe, yourread, b->core.l_qseq, lambda, delta, delta_s, nv, yourqual, model,Tol,l_check);
//cout << "length 0" << " " << b->core.l_qseq << "\n";

#if 0
// Read Name
std::cout << bam_get_qname(b) << "\t";
cout << bam_aux_get(b,"FL") << "\t";
for (int cycle=0;cycle<b->core.l_qseq;cycle++)
std::cout<<nuc[(int)refToChar[myread[cycle]]];
std::cout<<"\t";
cout << "length 2" << b->core.l_qseq << "\n";
for (int cycle=0;cycle<b->core.l_qseq;cycle++)
std::cout<<nuc[(int)refToChar[myrefe[cycle]]];
std::cout<<"\t";

for (int cycle=0;cycle<b->core.l_qseq;cycle++)
std::cout<<(char)(bam_get_qual(b)[cycle]+33);
if (isrecal==1)
std::cout<<"\t"<<"AO:f:"<<PostAncProb1<<"\t"<<"AN:f:"<<PostAncProb2<<"\t"<<"PD:f:"<<PostPMDProb<<"\n";
else
std::cout<<"\t"<<"AN:f:"<<PostAncProb1<<"\t"<<"PD:f:"<<PostPMDProb<<"\n";
#endif
//Bam output is the bam file orientation
if (isrecal==1){
bam_aux_update_float(b,"AO",PostAncProb1);
Expand Down
5 changes: 5 additions & 0 deletions pp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#ifndef POSTERPROB_H
#define POSTERPROB_H

bam_hdr_t* calc_pp_pmd_prob(char *refName,char *fname, char* ofname, int mapped_only,int se_only, int mapq, faidx_t *seq_ref, int len_limit, int len_min, int model, double eps, double lambda, double delta, double delta_s, double nv, double anc_mu, double anc_si, double mod_mu, double mod_si, int isrecal, kstring_t *str_cli,double **deamRateCT,double **deamRateGA,double Tol,int l_check);
#endif
8 changes: 4 additions & 4 deletions read_all_reads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#include "misc.h"


double **read_all_reads(samFile *in,sam_hdr_t *hdr,faidx_t *seq_ref,int len_limit,double lambda,double delta,double delta_s,double nv,double Tol,int &ndim, int model){
double **read_all_reads(samFile *in,sam_hdr_t *hdr,faidx_t *seq_ref,int len_limit,double lambda,double delta,double delta_s,double nv,double Tol,int &ndim, int model,int l_check){

char reconstructedRef[512];
char myread[512];
Expand Down Expand Up @@ -76,12 +76,12 @@ double **read_all_reads(samFile *in,sam_hdr_t *hdr,faidx_t *seq_ref,int len_limi
fprintf(stderr,"rrr: %d %d \n",yourread[dist5p] ,yourrefe[dist5p]);
//}
}
double l_err = ErrorLik(yourrefe, yourread, b->core.l_qseq, yourqual);
double l_anc = PMDLik_b(yourrefe, yourread, b->core.l_qseq, lambda, delta, delta_s, nv, yourqual,Tol);
double l_err = ErrorLik(yourrefe, yourread, b->core.l_qseq, yourqual,l_check);
double l_anc = PMDLik_b(yourrefe, yourread, b->core.l_qseq, lambda, delta, delta_s, nv, yourqual,Tol,l_check);
// fprintf(stderr,"pmdlik_b: %f nb: %f model: %d\n",PMDLik_b(yourrefe, yourread, b->core.l_qseq, lambda, delta, delta_s, nv, yourqual,Tol),PMDLik_nb(yourrefe, yourread, b->core.l_qseq, lambda, delta, delta_s, nv, yourqual,Tol),model);

if(model==1)
l_anc = 0.5*l_anc + 0.5*PMDLik_nb(yourrefe, yourread, b->core.l_qseq, lambda, delta, delta_s, nv, yourqual,Tol);
l_anc = 0.5*l_anc + 0.5*PMDLik_nb(yourrefe, yourread, b->core.l_qseq, lambda, delta, delta_s, nv, yourqual,Tol,l_check);
// fprintf(stderr,"lanc: %f\n",l_anc);exit(0);
vec.push_back(l_err);
vec.push_back(l_anc);
Expand Down
2 changes: 1 addition & 1 deletion read_all_reads.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@
#include <htslib/sam.h>
#include <htslib/faidx.h>

double **read_all_reads(samFile *in,sam_hdr_t *hdr,faidx_t *seq_ref,int len_limit,double lambda,double delta,double delta_s,double nv,double Tol,int &ndim,int model);
double **read_all_reads(samFile *in,sam_hdr_t *hdr,faidx_t *seq_ref,int len_limit,double lambda,double delta,double delta_s,double nv,double Tol,int &ndim,int model,int l_check);
#endif

0 comments on commit 8dc816d

Please sign in to comment.