diff --git a/docs/usage.rst b/docs/usage.rst index ca4d227..e1d251f 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -31,7 +31,7 @@ You can also iterate over STRFinder object to get exact tandem repeat (ETR) obje >>> for ssr in pytrf.STRFinder(name, seq): >>> print(ssr.chrom) >>> print(ssr.motif) - >>> print(ssr.repeats) + >>> print(ssr.repeat) You can define the minimum number of repeats required to determine a SSR. @@ -70,13 +70,13 @@ Iterate over GTRFinder object to get ETR object >>> for gtr in pytrf.GTRMiner(name, seq): >>> print(gtr.chrom) >>> print(gtr.motif) - >>> print(gtr.repeats) + >>> print(gtr.repeat) You can customize the motif size, minimum repeat and minimum length. .. code:: python - >>> gtrs = pytrf.GTRFinder(name, seq, max_motif=100, min_repeat=3, min_length=10) + >>> gtrs = pytrf.GTRFinder(name, seq, min_motif=20, max_motif=100, min_repeat=3, min_length=10) A complete example, get all gtrs and output csv format @@ -84,7 +84,7 @@ A complete example, get all gtrs and output csv format >>> fa = pyfastx.Fastx('tests/data/test.fa', uppercase=True): >>> for name, seq in fa: - >>> for gtr in pytrf.GTRFinder(name, seq, 100, 2, 10): + >>> for gtr in pytrf.GTRFinder(name, seq, 30, 100, 2, 10): >>> print(vntr.as_string(',')) Exact tandem repeat @@ -113,7 +113,7 @@ ETR is a readonly object and allows you to access the attributes and convert to >>> ssr.motif >>> # get number of repeats - >>> ssr.repeats + >>> ssr.repeat >>> # get repeat length >>> ssr.length @@ -279,7 +279,7 @@ Find exact generic tandem repeats (GTRs) from fasta/q file. pytrf gtrfinder -h - usage: pytrf findgtr [-h] [-o] [-f] [-m] [-r] [-l] fastx + usage: pytrf findgtr [-h] [-o] [-f] [-m] [-M] [-r] [-l] fastx positional arguments: fastx input fasta or fastq file (gzip support) @@ -288,7 +288,8 @@ Find exact generic tandem repeats (GTRs) from fasta/q file. -h, --help show this help message and exit -o , --out-file output file (default: stdout) -f , --out-format output format, tsv, csv or gff (default: tsv) - -m , --max-motif maximum motif length (default: 30) + -m , --min-motif minimum motif length (default: 10) + -M , --max-motif maximum motif length (default: 100) -r , --min-repeat minimum repeat number (default: 3) -l , --min-length minimum repeat length (default: 10) @@ -298,7 +299,7 @@ Find imperfect or approximate tandem repeats (ATRs) pytrf atrfinder -h - usage: pytrf findatr [-h] [-o] [-f] [-m] [-r] [-l] [-e] [-p] [-x] fastx + usage: pytrf findatr [-h] [-o] [-f] [-m] [-M] [-r] [-l] [-e] [-p] [-x] fastx positional arguments: fastx input fasta or fastq file (gzip support) @@ -307,7 +308,9 @@ Find imperfect or approximate tandem repeats (ATRs) -h, --help show this help message and exit -o , --out-file output file (default: stdout) -f , --out-format output format, tsv, csv or gff (default: tsv) - -m , --max-motif-size + -m , --min-motif-size + minimum motif length (default: 1) + -M , --max-motif-size maximum motif length (default: 6) -r , --min-seed-repeat minimum repeat number for seed (default: 3) @@ -315,7 +318,7 @@ Find imperfect or approximate tandem repeats (ATRs) minimum length for seed (default: 10) -e , --max-continuous-error maximum number of continuous alignment errors (default: 3) - -p , --min-identity minimum identity from 0 to 100 (default: 70) + -p , --min-identity minimum identity from 0 to 1 (default: 0.7) -x , --max-extend-length maximum length allowed to extend (default: 2000) diff --git a/pytrfcli.py b/pytrfcli.py index 44fcc7e..d9e6772 100644 --- a/pytrfcli.py +++ b/pytrfcli.py @@ -23,12 +23,12 @@ def str_finder(seq, minrep, outfmt, outfw): ssrs = pytrf.STRFinder(seq[0], seq[1], *minrep) get_format_result(ssrs, outfmt, outfw) -def gtr_finder(seq, maxmotif, minrep, minlen, outfmt, outfw): - gtrs = pytrf.GTRFinder(seq[0], seq[1], maxmotif, minrep, minlen) +def gtr_finder(seq, minmotif, maxmotif, minrep, minlen, outfmt, outfw): + gtrs = pytrf.GTRFinder(seq[0], seq[1], minmotif, maxmotif, minrep, minlen) get_format_result(gtrs, outfmt, outfw) -def atr_finder(seq, maxmotif, seedrep, seedlen, maxerror, minscore, maxextend, outfmt, outfw): - atrs = pytrf.ATRFinder(seq[0], seq[1], maxmotif, seedrep, seedlen, maxerror, minscore, maxextend) +def atr_finder(seq, minmotif, maxmotif, seedrep, seedlen, maxerror, minscore, maxextend, outfmt, outfw): + atrs = pytrf.ATRFinder(seq[0], seq[1], minmotif, maxmotif, seedrep, seedlen, maxerror, minscore, maxextend) get_format_result(atrs, outfmt, outfw) def tandem_repeat_finder(args): @@ -41,6 +41,7 @@ def tandem_repeat_finder(args): elif args.cmd == 'gtr': return functools.partial(gtr_finder, + minmotif = args.min_motif, maxmotif = args.max_motif, minrep = args.min_repeat, minlen = args.min_length, @@ -50,6 +51,7 @@ def tandem_repeat_finder(args): elif args.cmd == 'atr': return functools.partial(atr_finder, + minmotif = args.min_motif_size, maxmotif = args.max_motif_size, seedrep = args.min_seed_repeat, seedlen = args.min_seed_length, @@ -161,11 +163,18 @@ def main(): ) parser_gtrfinder.set_defaults(cmd='gtr') - parser_gtrfinder.add_argument('-m', '--max-motif', + parser_gtrfinder.add_argument('-m', '--min-motif', + default = 10, + metavar = '', + type = int, + help = "minimum motif length (default: 10)" + ) + + parser_gtrfinder.add_argument('-M', '--max-motif', default = 30, metavar = '', type = int, - help = "maximum motif length (default: 30)" + help = "maximum motif length (default: 100)" ) parser_gtrfinder.add_argument('-r', '--min-repeat', @@ -189,7 +198,14 @@ def main(): ) parser_atrfinder.set_defaults(cmd='atr') - parser_atrfinder.add_argument('-m', '--max-motif-size', + parser_atrfinder.add_argument('-m', '--min-motif-size', + default = 1, + metavar = '', + type = int, + help = "minimum motif length (default: 1)" + ) + + parser_atrfinder.add_argument('-M', '--max-motif-size', default = 6, metavar = '', type = int,