Skip to content

Commit

Permalink
add put_gap_at_end; bugs in sub_aln & n_span_read
Browse files Browse the repository at this point in the history
  • Loading branch information
yangao07 committed Jan 28, 2025
1 parent e6bb6fd commit 6670bbe
Show file tree
Hide file tree
Showing 7 changed files with 14 additions and 10 deletions.
3 changes: 2 additions & 1 deletion example.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,9 @@ int main(void) {
abpt->out_msa = 1; // generate Row-Column multiple sequence alignment(RC-MSA), set 0 to disable
abpt->out_cons = 1; // generate consensus sequence, set 0 to disable
abpt->w = 6, abpt->k = 9; abpt->min_w = 10; // minimizer-based seeding and partition
abpt->progressive_poa = 1;
abpt->progressive_poa = 0;
abpt->max_n_cons = 2; // to generate 2 consensus sequences
// abpt->sub_aln = 1;

abpoa_post_set_para(abpt);

Expand Down
3 changes: 1 addition & 2 deletions include/abpoa.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ typedef struct {
int use_score_matrix; // set _mat_ based on score matrix file, then _match_/_mismatch_ is not used.
int match, max_mat, mismatch, min_mis, gap_open1, gap_open2, gap_ext1, gap_ext2; int inf_min;
int sort_input_seq; // sort input sequences by length, in descending order
int put_gap_on_right; // put indel on the left-most side of the alignment: minimap2-like, or right-most: wfa2-like
int inc_path_score; // set mismatch/match score based on node weight, i.e., # covered reads * match/mismatch
// minimizer seeding parameter
int k, w, min_w;
Expand All @@ -79,7 +78,7 @@ typedef struct {
// alignment mode
uint8_t ret_cigar:1, rev_cigar:1, out_msa:1, out_cons:1, out_gfa:1, out_fq:1, use_read_ids:1, amb_strand:1;
// sub_aln: reads align to subgraph, total read count is based on subgraph coverage, i.e., node.n_span_read, not total input read count
uint8_t sub_aln:1, use_qv:1, disable_seeding:1, progressive_poa:1;
uint8_t sub_aln:1, use_qv:1, disable_seeding:1, progressive_poa:1, put_gap_on_right:, put_gap_at_end:1; // put indel on the left-most side of the alignment: minimap2-like, or right-most: wfa2-like
char *incr_fn, *out_pog;
int align_mode, gap_mode, max_n_cons, cons_algrm; // consensus calling algorithm: 0: partial order graph, 1: majority voting
double min_freq; // for multiploid data
Expand Down
3 changes: 3 additions & 0 deletions src/abpoa.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ const struct option abpoa_long_opt [] = {
{ "inc-path-score", 0, NULL, 'G'},
{ "sort-by-len", 0, NULL, 'L'},
{ "gap-on-right", 0, NULL, 'R' },
{ "gap-at-end", 0, NULL, 'J' },
{ "use-qual-weight", 0, NULL, 'Q'},
{ "amino-acid", 0, NULL, 'c'},
{ "in-list", 0, NULL, 'l' },
Expand Down Expand Up @@ -96,6 +97,7 @@ int abpoa_usage(void)
err_printf(" -G --inc-path-score include log-scaled path score for graph alignment [False]\n");
err_printf(" -L --sort-by-len sort input sequences by length in descending order [False]\n");
err_printf(" -R --gap-on-right put indel on the right-most side of the alignment, default is left [False]\n");
err_printf(" -J --gap-at-end always put indel at the end of the alignment if possible [False]\n");
// err_printf(" -z --zdrop INT Z-drop score in extension alignment [-1]\n");
// err_printf(" set as <= 0 to disable Z-drop extension\n");
// err_printf(" -e --bonus INT end bonus score in extension alignment [-1]\n");
Expand Down Expand Up @@ -178,6 +180,7 @@ int main(int argc, char **argv) {
case 'G': abpt->inc_path_score = 1; break;
case 'L': abpt->sort_input_seq = 1; break;
case 'R': abpt->put_gap_on_right = 1; break;
case 'J': abpt->put_gap_at_end = 1; break;
case 'b': abpt->wb = atoi(optarg); break;
case 'f': abpt->wf = atof(optarg); break;
case 'z': abpt->zdrop = atoi(optarg); break;
Expand Down
3 changes: 1 addition & 2 deletions src/abpoa.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ typedef struct {
int use_score_matrix; // set _mat_ based on score matrix file, then _match_/_mismatch_ is not used.
int match, max_mat, mismatch, min_mis, gap_open1, gap_open2, gap_ext1, gap_ext2; int inf_min;
int sort_input_seq; // sort input sequences by length, in descending order
int put_gap_on_right; // put indel on the left-most side of the alignment: minimap2-like, or right-most: wfa2-like
int inc_path_score; // set mismatch/match score based on node weight, i.e., # covered reads * match/mismatch
// minimizer seeding parameter
int k, w, min_w;
Expand All @@ -79,7 +78,7 @@ typedef struct {
// alignment mode
uint8_t ret_cigar:1, rev_cigar:1, out_msa:1, out_cons:1, out_gfa:1, out_fq:1, use_read_ids:1, amb_strand:1;
// sub_aln: reads align to subgraph, total read count is based on subgraph coverage, i.e., node.n_span_read, not total input read count
uint8_t sub_aln:1, use_qv:1, disable_seeding:1, progressive_poa:1;
uint8_t sub_aln:1, use_qv:1, disable_seeding:1, progressive_poa:1, put_gap_on_right:, put_gap_at_end:1; // put indel on the left-most side of the alignment: minimap2-like, or right-most: wfa2-like
char *incr_fn, *out_pog;
int align_mode, gap_mode, max_n_cons, cons_algrm; // consensus calling algorithm: 0: partial order graph, 1: majority voting
double min_freq; // for multiploid data
Expand Down
1 change: 1 addition & 0 deletions src/abpoa_align.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ abpoa_para_t *abpoa_init_para(void) {
abpt->inc_path_score = 0;
abpt->sort_input_seq = 0;
abpt->put_gap_on_right = 0;
abpt->put_gap_at_end = 0;

abpt->wb = ABPOA_EXTRA_B; // extra bandwidth
abpt->wf = ABPOA_EXTRA_F; // extra bandwidth
Expand Down
3 changes: 2 additions & 1 deletion src/abpoa_graph.c
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,7 @@ int abpoa_add_graph_edge(abpoa_graph_t *abg, int from_id, int to_id, int check_e
return ret;
}

// XXX n_span_read should be updated after adding all edges/nodes in the graph, right before topological sort/consensus calling
void abpoa_update_node_n_span_reads(abpoa_graph_t *abg, int src_id, int sink_id, int inc_both_ends) {
int src_index = abg->node_id_to_index[src_id];
int sink_index = abg->node_id_to_index[sink_id];
Expand Down Expand Up @@ -759,7 +760,7 @@ int abpoa_add_subgraph_alignment(abpoa_t *ab, abpoa_para_t *abpt, int beg_node_i
// abpoa_add_graph_edge(abg, last_id, end_node_id, 1-last_new, w, add_read_id&add, read_id, read_ids_n);
abpoa_add_graph_edge(abg, last_id, end_node_id, 1-last_new, weight[seq_l-1], add_read_id, add_read_weight, read_id, read_ids_n, tot_read_n);
if (inc_both_ends == 0) abg->node[last_id].n_read--;
abg->is_called_cons = abg->is_topological_sorted = 0;
abg->is_called_cons = abg->is_set_msa_rank = abg->is_topological_sorted = 0;
abpoa_topological_sort(abg, abpt);
// update node.n_read
abpoa_update_node_n_span_reads(abg, beg_node_id, end_node_id, inc_both_ends);
Expand Down
8 changes: 4 additions & 4 deletions src/simd_abpoa_align.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ void simd_output_pre_nodes(int *pre_index, int pre_n, int dp_i, int dp_j, int cu
id = abpoa_graph_index_to_node_id(graph, i+beg_index); \
if (best_j < qlen) cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CINS, qlen-j, -1, qlen-1); \
dp_h = DP_H + i * dp_sn; _dp_h = (score_t*)dp_h; int path_score = 0; \
int look_for_first_gap_at_end = 1; /* prefer to keep the last gap at end (begining of the backtrack) */ \
int look_for_first_gap_at_end = abpt->put_gap_at_end; /* prefer to keep the last gap at end (begining of the backtrack) */ \
int put_gap_on_right = abpt->put_gap_on_right; \
while (i > 0 && j > 0) { \
if (abpt->align_mode == ABPOA_LOCAL_MODE && _dp_h[j] == 0) break; \
Expand Down Expand Up @@ -202,7 +202,7 @@ void simd_output_pre_nodes(int *pre_index, int pre_n, int dp_i, int dp_j, int cu
id = abpoa_graph_index_to_node_id(graph, i+beg_index); \
if (best_j < qlen) cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CINS, qlen-j, -1, qlen-1); \
SIMDi *dp_h = DP_HEF + dp_sn * i * 3; _dp_h = (score_t*)dp_h; int path_score = 0; \
int look_for_first_gap_at_end = 1; /* prefer to keep the last gap at end (begining of the backtrack) */ \
int look_for_first_gap_at_end = abpt->put_gap_at_end; /* prefer to keep the last gap at end (begining of the backtrack) */ \
int put_gap_on_right = abpt->put_gap_on_right; \
while (i > 0 && j > 0) { \
if (abpt->align_mode == ABPOA_LOCAL_MODE && _dp_h[j] == 0) break; \
Expand Down Expand Up @@ -316,7 +316,7 @@ void simd_output_pre_nodes(int *pre_index, int pre_n, int dp_i, int dp_j, int cu
id = abpoa_graph_index_to_node_id(graph, i+beg_index); \
if (best_j < qlen) cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CINS, qlen-best_j, -1, qlen-1); \
SIMDi *dp_h = DP_H2E2F + dp_sn * i * 5; _dp_h = (score_t*)dp_h; int path_score = 0; \
int look_for_first_gap_at_end = 1; /* prefer to keep the last gap at the end (begining of backtrack) */ \
int look_for_first_gap_at_end = abpt->put_gap_at_end; /* prefer to keep the last gap at the end (begining of backtrack) */ \
int put_gap_on_right = abpt->put_gap_on_right; /* prefer to keep gaps at the left-most position, minimap2-like */ \
while (i > 0 && j > 0) { \
if (abpt->align_mode == ABPOA_LOCAL_MODE && _dp_h[j] == 0) break; \
Expand Down Expand Up @@ -1528,7 +1528,7 @@ void abpoa_cg_backtrack(SIMDi *DP_H2E2F, int **pre_index, int *pre_n, int *dp_be
id = abpoa_graph_index_to_node_id(graph, dp_i+beg_index);
if (best_dp_j < qlen) cigar = abpoa_push_cigar(&n_c, &m_c, cigar, ABPOA_CINS, qlen-best_dp_j, -1, qlen-1);
dp_h = DP_H2E2F + dp_sn * (dp_i * 5); _dp_h = (int32_t*)dp_h;
int look_for_first_gap_at_end = 1; /* prefer to keep the last gap at end (begining of the backtrack) */
int look_for_first_gap_at_end = abpt->put_gap_at_end; /* prefer to keep the last gap at end (begining of the backtrack) */
int put_gap_on_right = abpt->put_gap_on_right; /* prefer to keep gaps at the left-most position, minimap2-like */
int path_score = 0;
while (dp_i > 0 && dp_j > 0) {
Expand Down

0 comments on commit 6670bbe

Please sign in to comment.