Skip to content

Commit

Permalink
Merge branch 'dev' of https://github.com/ARUP-NGS/BMFtools into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Baker committed May 20, 2016
2 parents ec32c9a + f5064dc commit 19c32af
Show file tree
Hide file tree
Showing 14 changed files with 116 additions and 118 deletions.
2 changes: 2 additions & 0 deletions MANUAL.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ If you're not interested in taking advantage of the barcode metadata (new p valu

For more specific use cases, postprocessing steps (such as cap and filter) can be used to prepare a bam for use by BMF-agnostic tools, and variant calling can be performed or vetted with `bmftools stack` or `bmftools vet`.

For translocation detection, soft-clippings are often used as markers for potential events. To improve performance of such tools, (e.g., WHAM), if adapters sequences have been masked, successive masked bases at the ends of reads can be removed by [maskripper](https://github.com/noseatbelts/maskripper). For rescue to work properly, however, this should be performed only after rescue has been completed.

##Tools


Expand Down
34 changes: 16 additions & 18 deletions lib/stack.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ namespace BMF {
rv += bam_itag(plp.b, "RV");
} else if(base1 == 'N') {
discordant = 0;
// Basically, throw out
base_call = base2;
agreed = ((uint32_t *)dlib::array_tag(plp.b, "FA"))[cycle2];
quality = ((uint32_t *)dlib::array_tag(plp.b, "PV"))[cycle2];
Expand Down Expand Up @@ -134,7 +133,7 @@ namespace BMF {
// auto without "reference" because auto is pointers.
for(auto uni: tumor.templates[refbase]) {
if(uni->get_quality() < aux->conf.minPV || uni->get_agreed() < aux->conf.minFA
|| (float)uni->get_agreed() / uni->get_size() < aux->conf.minFR) {
|| (float)uni->get_agreed() / uni->get_size() < aux->conf.min_fr) {
uni->set_pass(0);
++failed_counts[0];
continue;
Expand All @@ -143,17 +142,17 @@ namespace BMF {
overlap_counts[0] += uni->get_overlap();
reverse_counts[0] += uni->get_reverse();
qscore_sums[0] += uni->get_quality();
allele_passes[0] = (duplex_counts[0] >= aux->conf.minDuplex &&
counts[0] >= aux->conf.minCount &&
overlap_counts[0] >= aux->conf.minOverlap);
allele_passes[0] = (duplex_counts[0] >= aux->conf.min_duplex &&
counts[0] >= aux->conf.min_count &&
overlap_counts[0] >= aux->conf.min_overlap);
}
}
if((match = normal.templates.find(refbase)) != normal.templates.end()) {
counts[n_base_calls] = match->second.size();
// auto without "reference" because auto is pointers.
for(auto uni: normal.templates[refbase]) {
if(uni->get_quality() < aux->conf.minPV || uni->get_agreed() < aux->conf.minFA
|| (float)uni->get_agreed() / uni->get_size() < aux->conf.minFR) {
|| (float)uni->get_agreed() / uni->get_size() < aux->conf.min_fr) {
uni->set_pass(0);
++failed_counts[n_base_calls];
continue;
Expand All @@ -163,9 +162,9 @@ namespace BMF {
reverse_counts[n_base_calls] += uni->get_reverse();
qscore_sums[n_base_calls] += uni->get_quality();
}
allele_passes[n_base_calls] = (duplex_counts[n_base_calls] >= aux->conf.minDuplex &&
counts[n_base_calls] >= aux->conf.minCount &&
overlap_counts[n_base_calls] >= aux->conf.minOverlap);
allele_passes[n_base_calls] = (duplex_counts[n_base_calls] >= aux->conf.min_duplex &&
counts[n_base_calls] >= aux->conf.min_count &&
overlap_counts[n_base_calls] >= aux->conf.min_overlap);
}
kstring_t allele_str = {0, 0, nullptr};
ks_resize(&allele_str, 8uL);
Expand All @@ -176,7 +175,7 @@ namespace BMF {
counts[i + n_base_calls] = match->second.size();
for(auto uni: normal.templates[base_calls[i]]) {
if(uni->get_quality() < aux->conf.minPV || uni->get_agreed() < aux->conf.minFA
|| (float)uni->get_agreed() / uni->get_size() < aux->conf.minFR) {
|| (float)uni->get_agreed() / uni->get_size() < aux->conf.min_fr) {
uni->set_pass(0);
++failed_counts[i];
continue;
Expand All @@ -186,15 +185,15 @@ namespace BMF {
reverse_counts[i + n_base_calls] += uni->get_reverse();
qscore_sums[i + n_base_calls] += uni->get_quality();
}
allele_passes[i + n_base_calls] = (duplex_counts[i + n_base_calls] >= aux->conf.minDuplex &&
counts[i + n_base_calls] >= aux->conf.minCount &&
overlap_counts[i + n_base_calls] >= aux->conf.minOverlap);
allele_passes[i + n_base_calls] = (duplex_counts[i + n_base_calls] >= aux->conf.min_duplex &&
counts[i + n_base_calls] >= aux->conf.min_count &&
overlap_counts[i + n_base_calls] >= aux->conf.min_overlap);
}
if((match = tumor.templates.find(base_calls[i])) != tumor.templates.end()) {
counts[i] = match->second.size();
for(auto uni: tumor.templates[base_calls[i]]) {
if(uni->get_quality() < aux->conf.minPV || uni->get_agreed() < aux->conf.minFA
|| (float)uni->get_agreed() / uni->get_size() < aux->conf.minFR) {
|| (float)uni->get_agreed() / uni->get_size() < aux->conf.min_fr) {
uni->set_pass(0);
++failed_counts[i];
continue;
Expand All @@ -204,11 +203,10 @@ namespace BMF {
reverse_counts[i] += uni->get_reverse();
qscore_sums[i] += uni->get_quality();
}
allele_passes[i] = (duplex_counts[i] >= aux->conf.minDuplex &&
counts[i] >= aux->conf.minCount &&
overlap_counts[i] >= aux->conf.minOverlap);
allele_passes[i] = (duplex_counts[i] >= aux->conf.min_duplex &&
counts[i] >= aux->conf.min_count &&
overlap_counts[i] >= aux->conf.min_overlap);
if(allele_passes[i] && !allele_passes[i + n_base_calls]) {
bcf_update_info_flag(aux->vcf.vh, vrec, "SOMATIC", nullptr, 1);
somatic[i] = 1;
}
}
Expand Down
12 changes: 5 additions & 7 deletions lib/stack.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,20 @@ static const char *stack_vcf_lines[] = {
"##FORMAT=<ID=RVF,Number=R,Type=Float,Description=\"Fraction of reads supporting allele which were reversed.\">",
"##FORMAT=<ID=QSS,Number=R,Type=Integer,Description=\"Q Score Sum for each allele for each sample.\">",
"##FORMAT=<ID=AMBIG,Number=1,Type=Integer,Description=\"Number of ambiguous (N) base calls at position.\">",
"##INFO=<ID=SOMATIC_PV,Number=R,Type=Float,Description=\"P value for a somatic call for each allele.\">",
"##INFO=<ID=SOMATIC_CALL,Number=R,Type=Integer,Description=\"Boolean value for a somatic call for each allele.\">",
"##INFO=<ID=SOMATIC,Number=0,Type=Flag,Description=\"Somatic mutation\">"
};

struct stack_conf_t {
float minFR; // Minimum fraction of family members agreed on base
float min_fr; // Minimum fraction of family members agreed on base
float minAF; // Minimum aligned fraction
int max_depth;
uint32_t minFM;
uint32_t minFA;
uint32_t minPV;
uint32_t minMQ;
int minCount;
int minDuplex;
int minOverlap;
uint32_t minmq;
int min_count;
int min_duplex;
int min_overlap;
int skip_improper;
uint32_t skip_flag; // Skip reads with any bits set to true
int output_bcf;
Expand Down
16 changes: 9 additions & 7 deletions src/bmf_depth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ namespace BMF {
std::vector<uint64_t> dmp_counts; // Counts for dmp observations along region
std::vector<uint64_t> singleton_counts; // Counts for singleton observations along region
uint32_t minFM:16; // Minimum family size
uint32_t minMQ:15; // Minimum mapping quality
uint32_t minmq:15; // Minimum mapping quality
uint32_t requireFP:1; // Set to true to require
khash_t(depth) *depth_hash;
uint64_t n_analyzed;
Expand Down Expand Up @@ -153,7 +153,7 @@ namespace BMF {
/*
* Reads from the bam, filtering based on settings in depth_aux_t.
* It fails unmapped/secondary/qcfail/pcr duplicate reads, as well as those
* with mapping qualities below minMQ and those with family sizes below minFM.
* with mapping qualities below minmq and those with family sizes below minFM.
* If requireFP is set, it also fails any with an FP:i:0 tag.
* If an FM tag is not found, reads are not filtered by family size.
* Similarly, if an FP tag is not found, reads are passed.
Expand All @@ -169,7 +169,7 @@ namespace BMF {
if ( ret<0 ) break;
uint8_t *data = bam_aux_get(b, "FM"), *fpdata = bam_aux_get(b, "FP");
if ((b->core.flag & (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP)) ||
b->core.qual < aux->minMQ || (data && bam_aux2i(data) < aux->minFM) ||
b->core.qual < aux->minmq || (data && bam_aux2i(data) < aux->minFM) ||
(aux->requireFP && fpdata && bam_aux2i(fpdata) == 0))
continue;
break;
Expand All @@ -183,7 +183,7 @@ namespace BMF {
kstream_t *ks;
hts_idx_t **idx;
depth_aux_t **aux;
int *n_plp, dret, i, n, c, minMQ = 0;
int *n_plp, dret, i, n, c, minmq = 0;
uint64_t *counts;
const bam_pileup1_t **plp;
int usage = 0, max_depth = DEFAULT_MAX_DEPTH, minFM = 0, n_quantiles = 4, padding = DEFAULT_PADDING, khr;
Expand All @@ -202,7 +202,7 @@ namespace BMF {
LOG_INFO("Writing output histogram to '%s'\n", optarg);
histfp = fopen(optarg, "w");
break;
case 'Q': minMQ = atoi(optarg); break;
case 'Q': minmq = atoi(optarg); break;
case 'b': bedpath = strdup(optarg); break;
case 'm': max_depth = atoi(optarg); break;
case 'f': minFM = atoi(optarg); break;
Expand All @@ -224,7 +224,7 @@ namespace BMF {
idx = (hts_idx_t **)calloc(n, sizeof(hts_idx_t*));
for (i = 0; i < n; ++i) {
aux[i] = (depth_aux_t *)calloc(1, sizeof(depth_aux_t));
aux[i]->minMQ = minMQ;
aux[i]->minmq = minmq;
aux[i]->minFM = minFM;
aux[i]->requireFP = requireFP;
aux[i]->fp = sam_open(argv[i + optind], "r");
Expand Down Expand Up @@ -261,7 +261,7 @@ namespace BMF {
kstring_t hdr_str{0, 0, nullptr};
ksprintf(&hdr_str, "##bed=%s\n", bedpath);
ksprintf(&hdr_str, "##NQuintiles=%i\n", n_quantiles);
ksprintf(&hdr_str, "##minMQ=%i\n", minMQ);
ksprintf(&hdr_str, "##minmq=%i\n", minmq);
ksprintf(&hdr_str, "##minFM=%i\n", minFM);
ksprintf(&hdr_str, "##padding=%i\n", padding);
ksprintf(&hdr_str, "##BMFtools version=%s.\n", BMF_VERSION);
Expand All @@ -271,7 +271,9 @@ namespace BMF {
std::vector<uint64_t> singleton_capture_counts(n);
kstring_t cov_str{0, 0, nullptr};
kstring_t str{0};
#if !NDEBUG
const int n_lines = dlib::count_bed_lines(bedpath);
#endif
while (ks_getuntil(ks, KS_SEP_LINE, &str, &dret) >= 0) {
char *p, *q;
int tid, start, stop, pos, region_len, arr_ind;
Expand Down
Loading

0 comments on commit 19c32af

Please sign in to comment.