From 120e85662b4a27b020091de9ce316bf42b0d8768 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Wed, 6 Mar 2024 08:37:41 -0500 Subject: [PATCH] r246: all translation tables Thanks to @giacomomutti in #57 --- main.c | 2 +- miniprot.1 | 2 +- miniprot.h | 2 +- nasw-tab.c | 30 +++++++++++++++++++++++++++++- 4 files changed, 32 insertions(+), 4 deletions(-) diff --git a/main.c b/main.c index a732880..456691f 100644 --- a/main.c +++ b/main.c @@ -55,7 +55,7 @@ static void print_usage(FILE *fp, const mp_idxopt_t *io, const mp_mapopt_t *mo, fprintf(fp, " -k INT k-mer size [%d]\n", io->kmer); fprintf(fp, " -M INT modimisers bit (sample rate = 1/2**M) [%d]\n", io->mod_bit); fprintf(fp, " -L INT min ORF length to index [%d]\n", io->min_aa_len); - fprintf(fp, " -T INT NCBI translation table (from 1 to 5) [%d]\n", io->trans_code); + fprintf(fp, " -T INT NCBI translation table (1 through 33) [%d]\n", io->trans_code); fprintf(fp, " -b INT bits per block [%d]\n", io->bbit); fprintf(fp, " -d FILE save index to FILE []\n"); fprintf(fp, " Mapping:\n"); diff --git a/miniprot.1 b/miniprot.1 index 19506e5..3efa2b7 100644 --- a/miniprot.1 +++ b/miniprot.1 @@ -48,7 +48,7 @@ Sample k-mers at a rate Minimum ORF length to index [30] .TP .BI -T \ INT -NCBI translation table (1 through 5) [1] +NCBI translation table (1 through 33 except 7-8 and 17-20) [1] .TP .BI -b \ INT Number of bits per bin [8]. Miniprot splits the genome into non-overlapping bins of 2^8 bp in size. diff --git a/miniprot.h b/miniprot.h index 91b07ac..0180d7f 100644 --- a/miniprot.h +++ b/miniprot.h @@ -3,7 +3,7 @@ #include -#define MP_VERSION "0.12-r245-dirty" +#define MP_VERSION "0.12-r246-dirty" #define MP_F_NO_SPLICE 0x1 #define MP_F_NO_ALIGN 0x2 diff --git a/nasw-tab.c b/nasw-tab.c index f661429..b86ce6d 100644 --- a/nasw-tab.c +++ b/nasw-tab.c @@ -12,7 +12,7 @@ char *ns_tab_aa_i2c = "ARNDCQEGHILKMFPSTWYV*X"; uint8_t ns_tab_a2r[22] = { 0, 2, 4, 4, 6, 5, 5, 8, 3, 10, 11, 2, 11, 12, 7, 1, 1, 13, 12, 10, 14, 15 }; // A R N D C Q E G H I L K M F P S T W Y V * X -#define NS_MAX_TRANS_CODE 5 +#define NS_MAX_TRANS_CODE 33 static const char *ns_tab_codon_all[NS_MAX_TRANS_CODE + 1] = { 0, // 0123456789012345678901234567890123456789012345678901234567890123 @@ -24,6 +24,34 @@ static const char *ns_tab_codon_all[NS_MAX_TRANS_CODE + 1] = { "KNKNTTTTRSRSMIMIQHQHPPPPRRRRTTTTEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLFX", // 3: The Yeast Mitochondrial Code "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLFX", // 4: The Mold, Protozoan, and Coelenterate Mitochondrial Code and the Mycoplasma/Spiroplasma Code "KNKNTTTTSSSSMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLFX" // 5: The Invertebrate Mitochondrial Code + "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVQYQYSSSS*CWCLFLFX", // 6: Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear + 0, // 7 + 0, // 8 + "NNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLFX", // 9: Echinoderm Mitochondrial; Flatworm Mitochondrial + "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSCCWCLFLFX", // 10: Euplotid Nuclear + "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLFX", // 11: Bacterial, Archaeal and Plant Plastid + "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLSLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLFX", // 12: Alternative Yeast Nuclear + "KNKNTTTTGSGSMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLFX", // 13: Ascidian Mitochondrial + "NNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYY*YSSSSWCWCLFLFX", // 14: Alternative Flatworm Mitochondrial + "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*YQYSSSS*CWCLFLFX", // 15: Blepharisma Macronuclear + "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*YLYSSSS*CWCLFLFX", // 16: Chlorophycean Mitochondrial + 0, // 17 + 0, // 18 + 0, // 19 + 0, // 20 + "NNKNTTTTSSSSMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLFX", // 21: Trematode Mitochondrial + "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*YLY*SSS*CWCLFLFX", // 22: Scenedesmus obliquus Mitochondrial + "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWC*FLFX", // 23: Thraustochytrium Mitochondrial + "KNKNTTTTSSKSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLFX", // 24: Rhabdopleuridae Mitochondrial + "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSGCWCLFLFX", // 25: Candidate Division SR1 and Gracilibacteria + "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLALEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLFX", // 26: Pachysolen tannophilus Nuclear + "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVQYQYSSSSWCWCLFLFX", // 27: Karyorelict Nuclear + "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVQYQYSSSSWCWCLFLFX", // 28: Condylostoma Nuclear + "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYYYYSSSS*CWCLFLFX", // 29: Mesodinium Nuclear + "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVEYEYSSSS*CWCLFLFX", // 30: Peritrich Nuclear + "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVEYEYSSSSWCWCLFLFX", // 31: Blastocrithidia Nuclear + "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*YWYSSSS*CWCLFLFX", // 32: Balanophoraceae Plastid + "KNKNTTTTSSKSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYY*YSSSSWCWCLFLFX", // 33: Cephalodiscidae Mitochondrial }; uint8_t ns_tab_nt4[256], ns_tab_aa20[256], ns_tab_aa13[256], ns_tab_codon[64], ns_tab_codon13[64];